Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/

1"""Compressed Sparse Row matrix format"""

3__docformat__ = "restructuredtext en"

5__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr']

7import numpy as np

9from ._matrix import spmatrix

10from ._base import _spbase, sparray

11from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,

12 get_csr_submatrix)

13from ._sputils import upcast

15from ._compressed import _cs_matrix

18class _csr_base(_cs_matrix):

19 _format = 'csr'

21 def transpose(self, axes=None, copy=False):

22 if axes is not None and axes != (1, 0):

23 raise ValueError("Sparse arrays/matrices do not support "

24 "an 'axes' parameter because swapping "

25 "dimensions is the only logical permutation.")

27 M, N = self.shape

28 return self._csc_container((self.data, self.indices,

29 self.indptr), shape=(N, M), copy=copy)

31 transpose.__doc__ = _spbase.transpose.__doc__

33 def tolil(self, copy=False):

34 lil = self._lil_container(self.shape, dtype=self.dtype)

36 self.sum_duplicates()

37 ptr,ind,dat = self.indptr,self.indices,self.data

38 rows, data = lil.rows, lil.data

40 for n in range(self.shape[0]):

41 start = ptr[n]

42 end = ptr[n+1]

43 rows[n] = ind[start:end].tolist()

44 data[n] = dat[start:end].tolist()

46 return lil

48 tolil.__doc__ = _spbase.tolil.__doc__

50 def tocsr(self, copy=False):

51 if copy:

52 return self.copy()

53 else:

54 return self

56 tocsr.__doc__ = _spbase.tocsr.__doc__

58 def tocsc(self, copy=False):

59 idx_dtype = self._get_index_dtype((self.indptr, self.indices),

60 maxval=max(self.nnz, self.shape[0]))

61 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)

62 indices = np.empty(self.nnz, dtype=idx_dtype)

63 data = np.empty(self.nnz, dtype=upcast(self.dtype))

65 csr_tocsc(self.shape[0], self.shape[1],

66 self.indptr.astype(idx_dtype),

67 self.indices.astype(idx_dtype),

68 self.data,

69 indptr,

70 indices,

71 data)

73 A = self._csc_container((data, indices, indptr), shape=self.shape)

74 A.has_sorted_indices = True

75 return A

77 tocsc.__doc__ = _spbase.tocsc.__doc__

79 def tobsr(self, blocksize=None, copy=True):

80 if blocksize is None:

81 from ._spfuncs import estimate_blocksize

82 return self.tobsr(blocksize=estimate_blocksize(self))

84 elif blocksize == (1,1):

85 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)

86 return self._bsr_container(arg1, shape=self.shape, copy=copy)

88 else:

89 R,C = blocksize

90 M,N = self.shape

92 if R < 1 or C < 1 or M % R != 0 or N % C != 0:

93 raise ValueError('invalid blocksize %s' % blocksize)

95 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)

97 idx_dtype = self._get_index_dtype((self.indptr, self.indices),

98 maxval=max(N//C, blks))

99 indptr = np.empty(M//R+1, dtype=idx_dtype)

100 indices = np.empty(blks, dtype=idx_dtype)

101 data = np.zeros((blks,R,C), dtype=self.dtype)

102

103 csr_tobsr(M, N, R, C,

104 self.indptr.astype(idx_dtype),

105 self.indices.astype(idx_dtype),

106 self.data,

107 indptr, indices, data.ravel())

108

109 return self._bsr_container(

110 (data, indices, indptr), shape=self.shape

111 )

112

113 tobsr.__doc__ = _spbase.tobsr.__doc__

114

115 # these functions are used by the parent class (_cs_matrix)

116 # to remove redundancy between csc_matrix and csr_array

117 def _swap(self, x):

118 """swap the members of x if this is a column-oriented matrix

119 """

120 return x

121

122 def __iter__(self):

123 indptr = np.zeros(2, dtype=self.indptr.dtype)

124 shape = (1, self.shape[1])

125 i0 = 0

126 for i1 in self.indptr[1:]:

127 indptr[1] = i1 - i0

128 indices = self.indices[i0:i1]

129 data = self.data[i0:i1]

130 yield self.__class__(

131 (data, indices, indptr), shape=shape, copy=True

132 )

133 i0 = i1

134

135 def _getrow(self, i):

136 """Returns a copy of row i of the matrix, as a (1 x n)

137 CSR matrix (row vector).

138 """

139 M, N = self.shape

140 i = int(i)

141 if i < 0:

142 i += M

143 if i < 0 or i >= M:

144 raise IndexError('index (%d) out of range' % i)

145 indptr, indices, data = get_csr_submatrix(

146 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)

147 return self.__class__((data, indices, indptr), shape=(1, N),

148 dtype=self.dtype, copy=False)

149

150 def _getcol(self, i):

151 """Returns a copy of column i of the matrix, as a (m x 1)

152 CSR matrix (column vector).

153 """

154 M, N = self.shape

155 i = int(i)

156 if i < 0:

157 i += N

158 if i < 0 or i >= N:

159 raise IndexError('index (%d) out of range' % i)

160 indptr, indices, data = get_csr_submatrix(

161 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)

162 return self.__class__((data, indices, indptr), shape=(M, 1),

163 dtype=self.dtype, copy=False)

164

165 def _get_intXarray(self, row, col):

166 return self._getrow(row)._minor_index_fancy(col)

167

168 def _get_intXslice(self, row, col):

169 if col.step in (1, None):

170 return self._get_submatrix(row, col, copy=True)

171 # TODO: uncomment this once it's faster:

172 # return self._getrow(row)._minor_slice(col)

173

174 M, N = self.shape

175 start, stop, stride = col.indices(N)

176

177 ii, jj = self.indptr[row:row+2]

178 row_indices = self.indices[ii:jj]

179 row_data = self.data[ii:jj]

180

181 if stride > 0:

182 ind = (row_indices >= start) & (row_indices < stop)

183 else:

184 ind = (row_indices <= start) & (row_indices > stop)

185

186 if abs(stride) > 1:

187 ind &= (row_indices - start) % stride == 0

188

189 row_indices = (row_indices[ind] - start) // stride

190 row_data = row_data[ind]

191 row_indptr = np.array([0, len(row_indices)])

192

193 if stride < 0:

194 row_data = row_data[::-1]

195 row_indices = abs(row_indices[::-1])

196

197 shape = (1, max(0, int(np.ceil(float(stop - start) / stride))))

198 return self.__class__((row_data, row_indices, row_indptr), shape=shape,

199 dtype=self.dtype, copy=False)

200

201 def _get_sliceXint(self, row, col):

202 if row.step in (1, None):

203 return self._get_submatrix(row, col, copy=True)

204 return self._major_slice(row)._get_submatrix(minor=col)

205

206 def _get_sliceXarray(self, row, col):

207 return self._major_slice(row)._minor_index_fancy(col)

208

209 def _get_arrayXint(self, row, col):

210 return self._major_index_fancy(row)._get_submatrix(minor=col)

211

212 def _get_arrayXslice(self, row, col):

213 if col.step not in (1, None):

214 col = np.arange(*col.indices(self.shape[1]))

215 return self._get_arrayXarray(row, col)

216 return self._major_index_fancy(row)._get_submatrix(minor=col)

217

218

219def isspmatrix_csr(x):

220 """Is `x` of csr_matrix type?

221

222 Parameters

223 ----------

224 x

225 object to check for being a csr matrix

226

227 Returns

228 -------

229 bool

230 True if `x` is a csr matrix, False otherwise

231

232 Examples

233 --------

234 >>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr

235 >>> isspmatrix_csr(csr_matrix([[5]]))

236 True

237 >>> isspmatrix_csr(csr_array([[5]]))

238 False

239 >>> isspmatrix_csr(coo_matrix([[5]]))

240 False

241 """

242 return isinstance(x, csr_matrix)

243

244

245# This namespace class separates array from matrix with isinstance

246class csr_array(_csr_base, sparray):

247 """

248 Compressed Sparse Row array.

249

250 This can be instantiated in several ways:

251 csr_array(D)

252 where D is a 2-D ndarray

253

254 csr_array(S)

255 with another sparse array or matrix S (equivalent to S.tocsr())

256

257 csr_array((M, N), [dtype])

258 to construct an empty array with shape (M, N)

259 dtype is optional, defaulting to dtype='d'.

260

261 csr_array((data, (row_ind, col_ind)), [shape=(M, N)])

262 where ``data``, ``row_ind`` and ``col_ind`` satisfy the

263 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.

264

265 csr_array((data, indices, indptr), [shape=(M, N)])

266 is the standard CSR representation where the column indices for

267 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their

268 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.

269 If the shape parameter is not supplied, the array dimensions

270 are inferred from the index arrays.

271

272 Attributes

273 ----------

274 dtype : dtype

275 Data type of the array

276 shape : 2-tuple

277 Shape of the array

278 ndim : int

279 Number of dimensions (this is always 2)

280 nnz

281 size

282 data

283 CSR format data array of the array

284 indices

285 CSR format index array of the array

286 indptr

287 CSR format index pointer array of the array

288 has_sorted_indices

289 has_canonical_format

290 T

291

292 Notes

293 -----

294

295 Sparse arrays can be used in arithmetic operations: they support

296 addition, subtraction, multiplication, division, and matrix power.

297

298 Advantages of the CSR format

299 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.

300 - efficient row slicing

301 - fast matrix vector products

302

303 Disadvantages of the CSR format

304 - slow column slicing operations (consider CSC)

305 - changes to the sparsity structure are expensive (consider LIL or DOK)

306

307 Canonical Format

308 - Within each row, indices are sorted by column.

309 - There are no duplicate entries.

310

311 Examples

312 --------

313

314 >>> import numpy as np

315 >>> from scipy.sparse import csr_array

316 >>> csr_array((3, 4), dtype=np.int8).toarray()

317 array([[0, 0, 0, 0],

318 [0, 0, 0, 0],

319 [0, 0, 0, 0]], dtype=int8)

320

321 >>> row = np.array([0, 0, 1, 2, 2, 2])

322 >>> col = np.array([0, 2, 2, 0, 1, 2])

323 >>> data = np.array([1, 2, 3, 4, 5, 6])

324 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()

325 array([[1, 0, 2],

326 [0, 0, 3],

327 [4, 5, 6]])

328

329 >>> indptr = np.array([0, 2, 3, 6])

330 >>> indices = np.array([0, 2, 2, 0, 1, 2])

331 >>> data = np.array([1, 2, 3, 4, 5, 6])

332 >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()

333 array([[1, 0, 2],

334 [0, 0, 3],

335 [4, 5, 6]])

336

337 Duplicate entries are summed together:

338

339 >>> row = np.array([0, 1, 2, 0])

340 >>> col = np.array([0, 1, 1, 0])

341 >>> data = np.array([1, 2, 4, 8])

342 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()

343 array([[9, 0, 0],

344 [0, 2, 0],

345 [0, 4, 0]])

346

347 As an example of how to construct a CSR array incrementally,

348 the following snippet builds a term-document array from texts:

349

350 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]

351 >>> indptr = [0]

352 >>> indices = []

353 >>> data = []

354 >>> vocabulary = {}

355 >>> for d in docs:

356 ... for term in d:

357 ... index = vocabulary.setdefault(term, len(vocabulary))

358 ... indices.append(index)

359 ... data.append(1)

360 ... indptr.append(len(indices))

361 ...

362 >>> csr_array((data, indices, indptr), dtype=int).toarray()

363 array([[2, 1, 0, 0],

364 [0, 1, 1, 1]])

365

366 """

367

368

369class csr_matrix(spmatrix, _csr_base):

370 """

371 Compressed Sparse Row matrix.

372

373 This can be instantiated in several ways:

374 csr_matrix(D)

375 where D is a 2-D ndarray

376

377 csr_matrix(S)

378 with another sparse array or matrix S (equivalent to S.tocsr())

379

380 csr_matrix((M, N), [dtype])

381 to construct an empty matrix with shape (M, N)

382 dtype is optional, defaulting to dtype='d'.

383

384 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])

385 where ``data``, ``row_ind`` and ``col_ind`` satisfy the

386 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.

387

388 csr_matrix((data, indices, indptr), [shape=(M, N)])

389 is the standard CSR representation where the column indices for

390 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their

391 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.

392 If the shape parameter is not supplied, the matrix dimensions

393 are inferred from the index arrays.

394

395 Attributes

396 ----------

397 dtype : dtype

398 Data type of the matrix

399 shape : 2-tuple

400 Shape of the matrix

401 ndim : int

402 Number of dimensions (this is always 2)

403 nnz

404 size

405 data

406 CSR format data array of the matrix

407 indices

408 CSR format index array of the matrix

409 indptr

410 CSR format index pointer array of the matrix

411 has_sorted_indices

412 has_canonical_format

413 T

414

415 Notes

416 -----

417

418 Sparse matrices can be used in arithmetic operations: they support

419 addition, subtraction, multiplication, division, and matrix power.

420

421 Advantages of the CSR format

422 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.

423 - efficient row slicing

424 - fast matrix vector products

425

426 Disadvantages of the CSR format

427 - slow column slicing operations (consider CSC)

428 - changes to the sparsity structure are expensive (consider LIL or DOK)

429

430 Canonical Format

431 - Within each row, indices are sorted by column.

432 - There are no duplicate entries.

433

434 Examples

435 --------

436

437 >>> import numpy as np

438 >>> from scipy.sparse import csr_matrix

439 >>> csr_matrix((3, 4), dtype=np.int8).toarray()

440 array([[0, 0, 0, 0],

441 [0, 0, 0, 0],

442 [0, 0, 0, 0]], dtype=int8)

443

444 >>> row = np.array([0, 0, 1, 2, 2, 2])

445 >>> col = np.array([0, 2, 2, 0, 1, 2])

446 >>> data = np.array([1, 2, 3, 4, 5, 6])

447 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()

448 array([[1, 0, 2],

449 [0, 0, 3],

450 [4, 5, 6]])

451

452 >>> indptr = np.array([0, 2, 3, 6])

453 >>> indices = np.array([0, 2, 2, 0, 1, 2])

454 >>> data = np.array([1, 2, 3, 4, 5, 6])

455 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()

456 array([[1, 0, 2],

457 [0, 0, 3],

458 [4, 5, 6]])

459

460 Duplicate entries are summed together:

461

462 >>> row = np.array([0, 1, 2, 0])

463 >>> col = np.array([0, 1, 1, 0])

464 >>> data = np.array([1, 2, 4, 8])

465 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()

466 array([[9, 0, 0],

467 [0, 2, 0],

468 [0, 4, 0]])

469

470 As an example of how to construct a CSR matrix incrementally,

471 the following snippet builds a term-document matrix from texts:

472

473 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]

474 >>> indptr = [0]

475 >>> indices = []

476 >>> data = []

477 >>> vocabulary = {}

478 >>> for d in docs:

479 ... for term in d:

480 ... index = vocabulary.setdefault(term, len(vocabulary))

481 ... indices.append(index)

482 ... data.append(1)

483 ... indptr.append(len(indices))

484 ...

485 >>> csr_matrix((data, indices, indptr), dtype=int).toarray()

486 array([[2, 1, 0, 0],

487 [0, 1, 1, 1]])

488

489 """

490

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_csr.py: 25%

132 statements