Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/sparse/_lil.py: 20%

290 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1"""List of Lists sparse matrix class 

2""" 

3 

4__docformat__ = "restructuredtext en" 

5 

6__all__ = ['lil_matrix', 'isspmatrix_lil'] 

7 

8from bisect import bisect_left 

9 

10import numpy as np 

11 

12from ._base import spmatrix, isspmatrix 

13from ._index import IndexMixin, INT_TYPES, _broadcast_arrays 

14from ._sputils import (getdtype, isshape, isscalarlike, upcast_scalar, 

15 get_index_dtype, check_shape, check_reshape_kwargs) 

16from . import _csparsetools 

17 

18 

19class lil_matrix(spmatrix, IndexMixin): 

20 """Row-based LIst of Lists sparse matrix 

21 

22 This is a structure for constructing sparse matrices incrementally. 

23 Note that inserting a single item can take linear time in the worst case; 

24 to construct a matrix efficiently, make sure the items are pre-sorted by 

25 index, per row. 

26 

27 This can be instantiated in several ways: 

28 lil_matrix(D) 

29 with a dense matrix or rank-2 ndarray D 

30 

31 lil_matrix(S) 

32 with another sparse matrix S (equivalent to S.tolil()) 

33 

34 lil_matrix((M, N), [dtype]) 

35 to construct an empty matrix with shape (M, N) 

36 dtype is optional, defaulting to dtype='d'. 

37 

38 Attributes 

39 ---------- 

40 dtype : dtype 

41 Data type of the matrix 

42 shape : 2-tuple 

43 Shape of the matrix 

44 ndim : int 

45 Number of dimensions (this is always 2) 

46 nnz 

47 Number of stored values, including explicit zeros 

48 data 

49 LIL format data array of the matrix 

50 rows 

51 LIL format row index array of the matrix 

52 

53 Notes 

54 ----- 

55 Sparse matrices can be used in arithmetic operations: they support 

56 addition, subtraction, multiplication, division, and matrix power. 

57 

58 Advantages of the LIL format 

59 - supports flexible slicing 

60 - changes to the matrix sparsity structure are efficient 

61 

62 Disadvantages of the LIL format 

63 - arithmetic operations LIL + LIL are slow (consider CSR or CSC) 

64 - slow column slicing (consider CSC) 

65 - slow matrix vector products (consider CSR or CSC) 

66 

67 Intended Usage 

68 - LIL is a convenient format for constructing sparse matrices 

69 - once a matrix has been constructed, convert to CSR or 

70 CSC format for fast arithmetic and matrix vector operations 

71 - consider using the COO format when constructing large matrices 

72 

73 Data Structure 

74 - An array (``self.rows``) of rows, each of which is a sorted 

75 list of column indices of non-zero elements. 

76 - The corresponding nonzero values are stored in similar 

77 fashion in ``self.data``. 

78 

79 

80 """ 

81 format = 'lil' 

82 

83 def __init__(self, arg1, shape=None, dtype=None, copy=False): 

84 spmatrix.__init__(self) 

85 self.dtype = getdtype(dtype, arg1, default=float) 

86 

87 # First get the shape 

88 if isspmatrix(arg1): 

89 if isspmatrix_lil(arg1) and copy: 

90 A = arg1.copy() 

91 else: 

92 A = arg1.tolil() 

93 

94 if dtype is not None: 

95 A = A.astype(dtype, copy=False) 

96 

97 self._shape = check_shape(A.shape) 

98 self.dtype = A.dtype 

99 self.rows = A.rows 

100 self.data = A.data 

101 elif isinstance(arg1,tuple): 

102 if isshape(arg1): 

103 if shape is not None: 

104 raise ValueError('invalid use of shape parameter') 

105 M, N = arg1 

106 self._shape = check_shape((M, N)) 

107 self.rows = np.empty((M,), dtype=object) 

108 self.data = np.empty((M,), dtype=object) 

109 for i in range(M): 

110 self.rows[i] = [] 

111 self.data[i] = [] 

112 else: 

113 raise TypeError('unrecognized lil_matrix constructor usage') 

114 else: 

115 # assume A is dense 

116 try: 

117 A = self._ascontainer(arg1) 

118 except TypeError as e: 

119 raise TypeError('unsupported matrix type') from e 

120 else: 

121 A = self._csr_container(A, dtype=dtype).tolil() 

122 

123 self._shape = check_shape(A.shape) 

124 self.dtype = A.dtype 

125 self.rows = A.rows 

126 self.data = A.data 

127 

128 def __iadd__(self,other): 

129 self[:,:] = self + other 

130 return self 

131 

132 def __isub__(self,other): 

133 self[:,:] = self - other 

134 return self 

135 

136 def __imul__(self,other): 

137 if isscalarlike(other): 

138 self[:,:] = self * other 

139 return self 

140 else: 

141 return NotImplemented 

142 

143 def __itruediv__(self,other): 

144 if isscalarlike(other): 

145 self[:,:] = self / other 

146 return self 

147 else: 

148 return NotImplemented 

149 

150 # Whenever the dimensions change, empty lists should be created for each 

151 # row 

152 

153 def getnnz(self, axis=None): 

154 if axis is None: 

155 return sum([len(rowvals) for rowvals in self.data]) 

156 if axis < 0: 

157 axis += 2 

158 if axis == 0: 

159 out = np.zeros(self.shape[1], dtype=np.intp) 

160 for row in self.rows: 

161 out[row] += 1 

162 return out 

163 elif axis == 1: 

164 return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp) 

165 else: 

166 raise ValueError('axis out of bounds') 

167 

168 def count_nonzero(self): 

169 return sum(np.count_nonzero(rowvals) for rowvals in self.data) 

170 

171 getnnz.__doc__ = spmatrix.getnnz.__doc__ 

172 count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__ 

173 

174 def __str__(self): 

175 val = '' 

176 for i, row in enumerate(self.rows): 

177 for pos, j in enumerate(row): 

178 val += " %s\t%s\n" % (str((i, j)), str(self.data[i][pos])) 

179 return val[:-1] 

180 

181 def getrowview(self, i): 

182 """Returns a view of the 'i'th row (without copying). 

183 """ 

184 new = self._lil_container((1, self.shape[1]), dtype=self.dtype) 

185 new.rows[0] = self.rows[i] 

186 new.data[0] = self.data[i] 

187 return new 

188 

189 def getrow(self, i): 

190 """Returns a copy of the 'i'th row. 

191 """ 

192 M, N = self.shape 

193 if i < 0: 

194 i += M 

195 if i < 0 or i >= M: 

196 raise IndexError('row index out of bounds') 

197 new = self._lil_container((1, N), dtype=self.dtype) 

198 new.rows[0] = self.rows[i][:] 

199 new.data[0] = self.data[i][:] 

200 return new 

201 

202 def __getitem__(self, key): 

203 # Fast path for simple (int, int) indexing. 

204 if (isinstance(key, tuple) and len(key) == 2 and 

205 isinstance(key[0], INT_TYPES) and 

206 isinstance(key[1], INT_TYPES)): 

207 # lil_get1 handles validation for us. 

208 return self._get_intXint(*key) 

209 # Everything else takes the normal path. 

210 return IndexMixin.__getitem__(self, key) 

211 

212 def _asindices(self, idx, N): 

213 # LIL routines handle bounds-checking for us, so don't do it here. 

214 try: 

215 x = np.asarray(idx) 

216 except (ValueError, TypeError, MemoryError) as e: 

217 raise IndexError('invalid index') from e 

218 if x.ndim not in (1, 2): 

219 raise IndexError('Index dimension must be <= 2') 

220 return x 

221 

222 def _get_intXint(self, row, col): 

223 v = _csparsetools.lil_get1(self.shape[0], self.shape[1], self.rows, 

224 self.data, row, col) 

225 return self.dtype.type(v) 

226 

227 def _get_sliceXint(self, row, col): 

228 row = range(*row.indices(self.shape[0])) 

229 return self._get_row_ranges(row, slice(col, col+1)) 

230 

231 def _get_arrayXint(self, row, col): 

232 row = row.squeeze() 

233 return self._get_row_ranges(row, slice(col, col+1)) 

234 

235 def _get_intXslice(self, row, col): 

236 return self._get_row_ranges((row,), col) 

237 

238 def _get_sliceXslice(self, row, col): 

239 row = range(*row.indices(self.shape[0])) 

240 return self._get_row_ranges(row, col) 

241 

242 def _get_arrayXslice(self, row, col): 

243 return self._get_row_ranges(row, col) 

244 

245 def _get_intXarray(self, row, col): 

246 row = np.array(row, dtype=col.dtype, ndmin=1) 

247 return self._get_columnXarray(row, col) 

248 

249 def _get_sliceXarray(self, row, col): 

250 row = np.arange(*row.indices(self.shape[0])) 

251 return self._get_columnXarray(row, col) 

252 

253 def _get_columnXarray(self, row, col): 

254 # outer indexing 

255 row, col = _broadcast_arrays(row[:,None], col) 

256 return self._get_arrayXarray(row, col) 

257 

258 def _get_arrayXarray(self, row, col): 

259 # inner indexing 

260 i, j = map(np.atleast_2d, _prepare_index_for_memoryview(row, col)) 

261 new = self._lil_container(i.shape, dtype=self.dtype) 

262 _csparsetools.lil_fancy_get(self.shape[0], self.shape[1], 

263 self.rows, self.data, 

264 new.rows, new.data, 

265 i, j) 

266 return new 

267 

268 def _get_row_ranges(self, rows, col_slice): 

269 """ 

270 Fast path for indexing in the case where column index is slice. 

271 

272 This gains performance improvement over brute force by more 

273 efficient skipping of zeros, by accessing the elements 

274 column-wise in order. 

275 

276 Parameters 

277 ---------- 

278 rows : sequence or range 

279 Rows indexed. If range, must be within valid bounds. 

280 col_slice : slice 

281 Columns indexed 

282 

283 """ 

284 j_start, j_stop, j_stride = col_slice.indices(self.shape[1]) 

285 col_range = range(j_start, j_stop, j_stride) 

286 nj = len(col_range) 

287 new = self._lil_container((len(rows), nj), dtype=self.dtype) 

288 

289 _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1], 

290 self.rows, self.data, 

291 new.rows, new.data, 

292 rows, 

293 j_start, j_stop, j_stride, nj) 

294 

295 return new 

296 

297 def _set_intXint(self, row, col, x): 

298 _csparsetools.lil_insert(self.shape[0], self.shape[1], self.rows, 

299 self.data, row, col, x) 

300 

301 def _set_arrayXarray(self, row, col, x): 

302 i, j, x = map(np.atleast_2d, _prepare_index_for_memoryview(row, col, x)) 

303 _csparsetools.lil_fancy_set(self.shape[0], self.shape[1], 

304 self.rows, self.data, 

305 i, j, x) 

306 

307 def _set_arrayXarray_sparse(self, row, col, x): 

308 # Special case: full matrix assignment 

309 if (x.shape == self.shape and 

310 isinstance(row, slice) and row == slice(None) and 

311 isinstance(col, slice) and col == slice(None)): 

312 x = self._lil_container(x, dtype=self.dtype) 

313 self.rows = x.rows 

314 self.data = x.data 

315 return 

316 # Fall back to densifying x 

317 x = np.asarray(x.toarray(), dtype=self.dtype) 

318 x, _ = _broadcast_arrays(x, row) 

319 self._set_arrayXarray(row, col, x) 

320 

321 def __setitem__(self, key, x): 

322 # Fast path for simple (int, int) indexing. 

323 if (isinstance(key, tuple) and len(key) == 2 and 

324 isinstance(key[0], INT_TYPES) and 

325 isinstance(key[1], INT_TYPES)): 

326 x = self.dtype.type(x) 

327 if x.size > 1: 

328 raise ValueError("Trying to assign a sequence to an item") 

329 return self._set_intXint(key[0], key[1], x) 

330 # Everything else takes the normal path. 

331 IndexMixin.__setitem__(self, key, x) 

332 

333 def _mul_scalar(self, other): 

334 if other == 0: 

335 # Multiply by zero: return the zero matrix 

336 new = self._lil_container(self.shape, dtype=self.dtype) 

337 else: 

338 res_dtype = upcast_scalar(self.dtype, other) 

339 

340 new = self.copy() 

341 new = new.astype(res_dtype) 

342 # Multiply this scalar by every element. 

343 for j, rowvals in enumerate(new.data): 

344 new.data[j] = [val*other for val in rowvals] 

345 return new 

346 

347 def __truediv__(self, other): # self / other 

348 if isscalarlike(other): 

349 new = self.copy() 

350 # Divide every element by this scalar 

351 for j, rowvals in enumerate(new.data): 

352 new.data[j] = [val/other for val in rowvals] 

353 return new 

354 else: 

355 return self.tocsr() / other 

356 

357 def copy(self): 

358 M, N = self.shape 

359 new = self._lil_container(self.shape, dtype=self.dtype) 

360 # This is ~14x faster than calling deepcopy() on rows and data. 

361 _csparsetools.lil_get_row_ranges(M, N, self.rows, self.data, 

362 new.rows, new.data, range(M), 

363 0, N, 1, N) 

364 return new 

365 

366 copy.__doc__ = spmatrix.copy.__doc__ 

367 

368 def reshape(self, *args, **kwargs): 

369 shape = check_shape(args, self.shape) 

370 order, copy = check_reshape_kwargs(kwargs) 

371 

372 # Return early if reshape is not required 

373 if shape == self.shape: 

374 if copy: 

375 return self.copy() 

376 else: 

377 return self 

378 

379 new = self._lil_container(shape, dtype=self.dtype) 

380 

381 if order == 'C': 

382 ncols = self.shape[1] 

383 for i, row in enumerate(self.rows): 

384 for col, j in enumerate(row): 

385 new_r, new_c = np.unravel_index(i * ncols + j, shape) 

386 new[new_r, new_c] = self[i, j] 

387 elif order == 'F': 

388 nrows = self.shape[0] 

389 for i, row in enumerate(self.rows): 

390 for col, j in enumerate(row): 

391 new_r, new_c = np.unravel_index(i + j * nrows, shape, order) 

392 new[new_r, new_c] = self[i, j] 

393 else: 

394 raise ValueError("'order' must be 'C' or 'F'") 

395 

396 return new 

397 

398 reshape.__doc__ = spmatrix.reshape.__doc__ 

399 

400 def resize(self, *shape): 

401 shape = check_shape(shape) 

402 new_M, new_N = shape 

403 M, N = self.shape 

404 

405 if new_M < M: 

406 self.rows = self.rows[:new_M] 

407 self.data = self.data[:new_M] 

408 elif new_M > M: 

409 self.rows = np.resize(self.rows, new_M) 

410 self.data = np.resize(self.data, new_M) 

411 for i in range(M, new_M): 

412 self.rows[i] = [] 

413 self.data[i] = [] 

414 

415 if new_N < N: 

416 for row, data in zip(self.rows, self.data): 

417 trunc = bisect_left(row, new_N) 

418 del row[trunc:] 

419 del data[trunc:] 

420 

421 self._shape = shape 

422 

423 resize.__doc__ = spmatrix.resize.__doc__ 

424 

425 def toarray(self, order=None, out=None): 

426 d = self._process_toarray_args(order, out) 

427 for i, row in enumerate(self.rows): 

428 for pos, j in enumerate(row): 

429 d[i, j] = self.data[i][pos] 

430 return d 

431 

432 toarray.__doc__ = spmatrix.toarray.__doc__ 

433 

434 def transpose(self, axes=None, copy=False): 

435 return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False) 

436 

437 transpose.__doc__ = spmatrix.transpose.__doc__ 

438 

439 def tolil(self, copy=False): 

440 if copy: 

441 return self.copy() 

442 else: 

443 return self 

444 

445 tolil.__doc__ = spmatrix.tolil.__doc__ 

446 

447 def tocsr(self, copy=False): 

448 M, N = self.shape 

449 if M == 0 or N == 0: 

450 return self._csr_container((M, N), dtype=self.dtype) 

451 

452 # construct indptr array 

453 if M*N <= np.iinfo(np.int32).max: 

454 # fast path: it is known that 64-bit indexing will not be needed. 

455 idx_dtype = np.int32 

456 indptr = np.empty(M + 1, dtype=idx_dtype) 

457 indptr[0] = 0 

458 _csparsetools.lil_get_lengths(self.rows, indptr[1:]) 

459 np.cumsum(indptr, out=indptr) 

460 nnz = indptr[-1] 

461 else: 

462 idx_dtype = get_index_dtype(maxval=N) 

463 lengths = np.empty(M, dtype=idx_dtype) 

464 _csparsetools.lil_get_lengths(self.rows, lengths) 

465 nnz = lengths.sum(dtype=np.int64) 

466 idx_dtype = get_index_dtype(maxval=max(N, nnz)) 

467 indptr = np.empty(M + 1, dtype=idx_dtype) 

468 indptr[0] = 0 

469 np.cumsum(lengths, dtype=idx_dtype, out=indptr[1:]) 

470 

471 indices = np.empty(nnz, dtype=idx_dtype) 

472 data = np.empty(nnz, dtype=self.dtype) 

473 _csparsetools.lil_flatten_to_array(self.rows, indices) 

474 _csparsetools.lil_flatten_to_array(self.data, data) 

475 

476 # init csr matrix 

477 return self._csr_container((data, indices, indptr), shape=self.shape) 

478 

479 tocsr.__doc__ = spmatrix.tocsr.__doc__ 

480 

481 

482def _prepare_index_for_memoryview(i, j, x=None): 

483 """ 

484 Convert index and data arrays to form suitable for passing to the 

485 Cython fancy getset routines. 

486 

487 The conversions are necessary since to (i) ensure the integer 

488 index arrays are in one of the accepted types, and (ii) to ensure 

489 the arrays are writable so that Cython memoryview support doesn't 

490 choke on them. 

491 

492 Parameters 

493 ---------- 

494 i, j 

495 Index arrays 

496 x : optional 

497 Data arrays 

498 

499 Returns 

500 ------- 

501 i, j, x 

502 Re-formatted arrays (x is omitted, if input was None) 

503 

504 """ 

505 if i.dtype > j.dtype: 

506 j = j.astype(i.dtype) 

507 elif i.dtype < j.dtype: 

508 i = i.astype(j.dtype) 

509 

510 if not i.flags.writeable or i.dtype not in (np.int32, np.int64): 

511 i = i.astype(np.intp) 

512 if not j.flags.writeable or j.dtype not in (np.int32, np.int64): 

513 j = j.astype(np.intp) 

514 

515 if x is not None: 

516 if not x.flags.writeable: 

517 x = x.copy() 

518 return i, j, x 

519 else: 

520 return i, j 

521 

522 

523def isspmatrix_lil(x): 

524 """Is x of lil_matrix type? 

525 

526 Parameters 

527 ---------- 

528 x 

529 object to check for being a lil matrix 

530 

531 Returns 

532 ------- 

533 bool 

534 True if x is a lil matrix, False otherwise 

535 

536 Examples 

537 -------- 

538 >>> from scipy.sparse import lil_matrix, isspmatrix_lil 

539 >>> isspmatrix_lil(lil_matrix([[5]])) 

540 True 

541 

542 >>> from scipy.sparse import lil_matrix, csr_matrix, isspmatrix_lil 

543 >>> isspmatrix_lil(csr_matrix([[5]])) 

544 False 

545 """ 

546 from ._arrays import lil_array 

547 return isinstance(x, lil_matrix) or isinstance(x, lil_array)