Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/

1"""Compressed Sparse Row matrix format"""

3__docformat__ = "restructuredtext en"

5__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr']

7import numpy as np

9from ._matrix import spmatrix

10from ._base import _spbase, sparray

11from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,

12 get_csr_submatrix)

13from ._sputils import upcast

15from ._compressed import _cs_matrix

18class _csr_base(_cs_matrix):

19 _format = 'csr'

21 def transpose(self, axes=None, copy=False):

22 if axes is not None and axes != (1, 0):

23 raise ValueError("Sparse arrays/matrices do not support "

24 "an 'axes' parameter because swapping "

25 "dimensions is the only logical permutation.")

27 M, N = self.shape

28 return self._csc_container((self.data, self.indices,

29 self.indptr), shape=(N, M), copy=copy)

31 transpose.__doc__ = _spbase.transpose.__doc__

33 def tolil(self, copy=False):

34 lil = self._lil_container(self.shape, dtype=self.dtype)

36 self.sum_duplicates()

37 ptr,ind,dat = self.indptr,self.indices,self.data

38 rows, data = lil.rows, lil.data

40 for n in range(self.shape[0]):

41 start = ptr[n]

42 end = ptr[n+1]

43 rows[n] = ind[start:end].tolist()

44 data[n] = dat[start:end].tolist()

46 return lil

48 tolil.__doc__ = _spbase.tolil.__doc__

50 def tocsr(self, copy=False):

51 if copy:

52 return self.copy()

53 else:

54 return self

56 tocsr.__doc__ = _spbase.tocsr.__doc__

58 def tocsc(self, copy=False):

59 idx_dtype = self._get_index_dtype((self.indptr, self.indices),

60 maxval=max(self.nnz, self.shape[0]))

61 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)

62 indices = np.empty(self.nnz, dtype=idx_dtype)

63 data = np.empty(self.nnz, dtype=upcast(self.dtype))

65 csr_tocsc(self.shape[0], self.shape[1],

66 self.indptr.astype(idx_dtype),

67 self.indices.astype(idx_dtype),

68 self.data,

69 indptr,

70 indices,

71 data)

73 A = self._csc_container((data, indices, indptr), shape=self.shape)

74 A.has_sorted_indices = True

75 return A

77 tocsc.__doc__ = _spbase.tocsc.__doc__

79 def tobsr(self, blocksize=None, copy=True):

80 if blocksize is None:

81 from ._spfuncs import estimate_blocksize

82 return self.tobsr(blocksize=estimate_blocksize(self))

84 elif blocksize == (1,1):

85 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)

86 return self._bsr_container(arg1, shape=self.shape, copy=copy)

88 else:

89 R,C = blocksize

90 M,N = self.shape

92 if R < 1 or C < 1 or M % R != 0 or N % C != 0:

93 raise ValueError('invalid blocksize %s' % blocksize)

95 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)

97 idx_dtype = self._get_index_dtype((self.indptr, self.indices),

98 maxval=max(N//C, blks))

99 indptr = np.empty(M//R+1, dtype=idx_dtype)

100 indices = np.empty(blks, dtype=idx_dtype)

101 data = np.zeros((blks,R,C), dtype=self.dtype)

102

103 csr_tobsr(M, N, R, C,

104 self.indptr.astype(idx_dtype),

105 self.indices.astype(idx_dtype),

106 self.data,

107 indptr, indices, data.ravel())

108

109 return self._bsr_container(

110 (data, indices, indptr), shape=self.shape

111 )

112

113 tobsr.__doc__ = _spbase.tobsr.__doc__

114

115 # these functions are used by the parent class (_cs_matrix)

116 # to remove redundancy between csc_matrix and csr_array

117 @staticmethod

118 def _swap(x):

119 """swap the members of x if this is a column-oriented matrix

120 """

121 return x

122

123 def __iter__(self):

124 indptr = np.zeros(2, dtype=self.indptr.dtype)

125 shape = (1, self.shape[1])

126 i0 = 0

127 for i1 in self.indptr[1:]:

128 indptr[1] = i1 - i0

129 indices = self.indices[i0:i1]

130 data = self.data[i0:i1]

131 yield self.__class__(

132 (data, indices, indptr), shape=shape, copy=True

133 )

134 i0 = i1

135

136 def _getrow(self, i):

137 """Returns a copy of row i of the matrix, as a (1 x n)

138 CSR matrix (row vector).

139 """

140 M, N = self.shape

141 i = int(i)

142 if i < 0:

143 i += M

144 if i < 0 or i >= M:

145 raise IndexError('index (%d) out of range' % i)

146 indptr, indices, data = get_csr_submatrix(

147 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)

148 return self.__class__((data, indices, indptr), shape=(1, N),

149 dtype=self.dtype, copy=False)

150

151 def _getcol(self, i):

152 """Returns a copy of column i of the matrix, as a (m x 1)

153 CSR matrix (column vector).

154 """

155 M, N = self.shape

156 i = int(i)

157 if i < 0:

158 i += N

159 if i < 0 or i >= N:

160 raise IndexError('index (%d) out of range' % i)

161 indptr, indices, data = get_csr_submatrix(

162 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)

163 return self.__class__((data, indices, indptr), shape=(M, 1),

164 dtype=self.dtype, copy=False)

165

166 def _get_intXarray(self, row, col):

167 return self._getrow(row)._minor_index_fancy(col)

168

169 def _get_intXslice(self, row, col):

170 if col.step in (1, None):

171 return self._get_submatrix(row, col, copy=True)

172 # TODO: uncomment this once it's faster:

173 # return self._getrow(row)._minor_slice(col)

174

175 M, N = self.shape

176 start, stop, stride = col.indices(N)

177

178 ii, jj = self.indptr[row:row+2]

179 row_indices = self.indices[ii:jj]

180 row_data = self.data[ii:jj]

181

182 if stride > 0:

183 ind = (row_indices >= start) & (row_indices < stop)

184 else:

185 ind = (row_indices <= start) & (row_indices > stop)

186

187 if abs(stride) > 1:

188 ind &= (row_indices - start) % stride == 0

189

190 row_indices = (row_indices[ind] - start) // stride

191 row_data = row_data[ind]

192 row_indptr = np.array([0, len(row_indices)])

193

194 if stride < 0:

195 row_data = row_data[::-1]

196 row_indices = abs(row_indices[::-1])

197

198 shape = (1, max(0, int(np.ceil(float(stop - start) / stride))))

199 return self.__class__((row_data, row_indices, row_indptr), shape=shape,

200 dtype=self.dtype, copy=False)

201

202 def _get_sliceXint(self, row, col):

203 if row.step in (1, None):

204 return self._get_submatrix(row, col, copy=True)

205 return self._major_slice(row)._get_submatrix(minor=col)

206

207 def _get_sliceXarray(self, row, col):

208 return self._major_slice(row)._minor_index_fancy(col)

209

210 def _get_arrayXint(self, row, col):

211 return self._major_index_fancy(row)._get_submatrix(minor=col)

212

213 def _get_arrayXslice(self, row, col):

214 if col.step not in (1, None):

215 col = np.arange(*col.indices(self.shape[1]))

216 return self._get_arrayXarray(row, col)

217 return self._major_index_fancy(row)._get_submatrix(minor=col)

218

219

220def isspmatrix_csr(x):

221 """Is `x` of csr_matrix type?

222

223 Parameters

224 ----------

225 x

226 object to check for being a csr matrix

227

228 Returns

229 -------

230 bool

231 True if `x` is a csr matrix, False otherwise

232

233 Examples

234 --------

235 >>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr

236 >>> isspmatrix_csr(csr_matrix([[5]]))

237 True

238 >>> isspmatrix_csr(csr_array([[5]]))

239 False

240 >>> isspmatrix_csr(coo_matrix([[5]]))

241 False

242 """

243 return isinstance(x, csr_matrix)

244

245

246# This namespace class separates array from matrix with isinstance

247class csr_array(_csr_base, sparray):

248 """

249 Compressed Sparse Row array.

250

251 This can be instantiated in several ways:

252 csr_array(D)

253 where D is a 2-D ndarray

254

255 csr_array(S)

256 with another sparse array or matrix S (equivalent to S.tocsr())

257

258 csr_array((M, N), [dtype])

259 to construct an empty array with shape (M, N)

260 dtype is optional, defaulting to dtype='d'.

261

262 csr_array((data, (row_ind, col_ind)), [shape=(M, N)])

263 where ``data``, ``row_ind`` and ``col_ind`` satisfy the

264 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.

265

266 csr_array((data, indices, indptr), [shape=(M, N)])

267 is the standard CSR representation where the column indices for

268 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their

269 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.

270 If the shape parameter is not supplied, the array dimensions

271 are inferred from the index arrays.

272

273 Attributes

274 ----------

275 dtype : dtype

276 Data type of the array

277 shape : 2-tuple

278 Shape of the array

279 ndim : int

280 Number of dimensions (this is always 2)

281 nnz

282 size

283 data

284 CSR format data array of the array

285 indices

286 CSR format index array of the array

287 indptr

288 CSR format index pointer array of the array

289 has_sorted_indices

290 has_canonical_format

291 T

292

293 Notes

294 -----

295

296 Sparse arrays can be used in arithmetic operations: they support

297 addition, subtraction, multiplication, division, and matrix power.

298

299 Advantages of the CSR format

300 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.

301 - efficient row slicing

302 - fast matrix vector products

303

304 Disadvantages of the CSR format

305 - slow column slicing operations (consider CSC)

306 - changes to the sparsity structure are expensive (consider LIL or DOK)

307

308 Canonical Format

309 - Within each row, indices are sorted by column.

310 - There are no duplicate entries.

311

312 Examples

313 --------

314

315 >>> import numpy as np

316 >>> from scipy.sparse import csr_array

317 >>> csr_array((3, 4), dtype=np.int8).toarray()

318 array([[0, 0, 0, 0],

319 [0, 0, 0, 0],

320 [0, 0, 0, 0]], dtype=int8)

321

322 >>> row = np.array([0, 0, 1, 2, 2, 2])

323 >>> col = np.array([0, 2, 2, 0, 1, 2])

324 >>> data = np.array([1, 2, 3, 4, 5, 6])

325 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()

326 array([[1, 0, 2],

327 [0, 0, 3],

328 [4, 5, 6]])

329

330 >>> indptr = np.array([0, 2, 3, 6])

331 >>> indices = np.array([0, 2, 2, 0, 1, 2])

332 >>> data = np.array([1, 2, 3, 4, 5, 6])

333 >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()

334 array([[1, 0, 2],

335 [0, 0, 3],

336 [4, 5, 6]])

337

338 Duplicate entries are summed together:

339

340 >>> row = np.array([0, 1, 2, 0])

341 >>> col = np.array([0, 1, 1, 0])

342 >>> data = np.array([1, 2, 4, 8])

343 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()

344 array([[9, 0, 0],

345 [0, 2, 0],

346 [0, 4, 0]])

347

348 As an example of how to construct a CSR array incrementally,

349 the following snippet builds a term-document array from texts:

350

351 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]

352 >>> indptr = [0]

353 >>> indices = []

354 >>> data = []

355 >>> vocabulary = {}

356 >>> for d in docs:

357 ... for term in d:

358 ... index = vocabulary.setdefault(term, len(vocabulary))

359 ... indices.append(index)

360 ... data.append(1)

361 ... indptr.append(len(indices))

362 ...

363 >>> csr_array((data, indices, indptr), dtype=int).toarray()

364 array([[2, 1, 0, 0],

365 [0, 1, 1, 1]])

366

367 """

368

369

370class csr_matrix(spmatrix, _csr_base):

371 """

372 Compressed Sparse Row matrix.

373

374 This can be instantiated in several ways:

375 csr_matrix(D)

376 where D is a 2-D ndarray

377

378 csr_matrix(S)

379 with another sparse array or matrix S (equivalent to S.tocsr())

380

381 csr_matrix((M, N), [dtype])

382 to construct an empty matrix with shape (M, N)

383 dtype is optional, defaulting to dtype='d'.

384

385 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])

386 where ``data``, ``row_ind`` and ``col_ind`` satisfy the

387 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.

388

389 csr_matrix((data, indices, indptr), [shape=(M, N)])

390 is the standard CSR representation where the column indices for

391 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their

392 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.

393 If the shape parameter is not supplied, the matrix dimensions

394 are inferred from the index arrays.

395

396 Attributes

397 ----------

398 dtype : dtype

399 Data type of the matrix

400 shape : 2-tuple

401 Shape of the matrix

402 ndim : int

403 Number of dimensions (this is always 2)

404 nnz

405 size

406 data

407 CSR format data array of the matrix

408 indices

409 CSR format index array of the matrix

410 indptr

411 CSR format index pointer array of the matrix

412 has_sorted_indices

413 has_canonical_format

414 T

415

416 Notes

417 -----

418

419 Sparse matrices can be used in arithmetic operations: they support

420 addition, subtraction, multiplication, division, and matrix power.

421

422 Advantages of the CSR format

423 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.

424 - efficient row slicing

425 - fast matrix vector products

426

427 Disadvantages of the CSR format

428 - slow column slicing operations (consider CSC)

429 - changes to the sparsity structure are expensive (consider LIL or DOK)

430

431 Canonical Format

432 - Within each row, indices are sorted by column.

433 - There are no duplicate entries.

434

435 Examples

436 --------

437

438 >>> import numpy as np

439 >>> from scipy.sparse import csr_matrix

440 >>> csr_matrix((3, 4), dtype=np.int8).toarray()

441 array([[0, 0, 0, 0],

442 [0, 0, 0, 0],

443 [0, 0, 0, 0]], dtype=int8)

444

445 >>> row = np.array([0, 0, 1, 2, 2, 2])

446 >>> col = np.array([0, 2, 2, 0, 1, 2])

447 >>> data = np.array([1, 2, 3, 4, 5, 6])

448 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()

449 array([[1, 0, 2],

450 [0, 0, 3],

451 [4, 5, 6]])

452

453 >>> indptr = np.array([0, 2, 3, 6])

454 >>> indices = np.array([0, 2, 2, 0, 1, 2])

455 >>> data = np.array([1, 2, 3, 4, 5, 6])

456 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()

457 array([[1, 0, 2],

458 [0, 0, 3],

459 [4, 5, 6]])

460

461 Duplicate entries are summed together:

462

463 >>> row = np.array([0, 1, 2, 0])

464 >>> col = np.array([0, 1, 1, 0])

465 >>> data = np.array([1, 2, 4, 8])

466 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()

467 array([[9, 0, 0],

468 [0, 2, 0],

469 [0, 4, 0]])

470

471 As an example of how to construct a CSR matrix incrementally,

472 the following snippet builds a term-document matrix from texts:

473

474 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]

475 >>> indptr = [0]

476 >>> indices = []

477 >>> data = []

478 >>> vocabulary = {}

479 >>> for d in docs:

480 ... for term in d:

481 ... index = vocabulary.setdefault(term, len(vocabulary))

482 ... indices.append(index)

483 ... data.append(1)

484 ... indptr.append(len(indices))

485 ...

486 >>> csr_matrix((data, indices, indptr), dtype=int).toarray()

487 array([[2, 1, 0, 0],

488 [0, 1, 1, 1]])

489

490 """

491

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_csr.py: 26%

133 statements