Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_sputils.py: 16%

202 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-14 06:37 +0000

1""" Utility functions for sparse matrix module 

2""" 

3 

4import sys 

5from typing import Any, Literal, Optional, Union 

6import operator 

7import numpy as np 

8from math import prod 

9import scipy.sparse as sp 

10from scipy._lib._util import np_long, np_ulong 

11 

12 

13__all__ = ['upcast', 'getdtype', 'getdata', 'isscalarlike', 'isintlike', 

14 'isshape', 'issequence', 'isdense', 'ismatrix', 'get_sum_dtype'] 

15 

16supported_dtypes = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, 

17 np.uintc, np_long, np_ulong, np.longlong, np.ulonglong, 

18 np.float32, np.float64, np.longdouble, 

19 np.complex64, np.complex128, np.clongdouble] 

20 

21_upcast_memo = {} 

22 

23 

24def upcast(*args): 

25 """Returns the nearest supported sparse dtype for the 

26 combination of one or more types. 

27 

28 upcast(t0, t1, ..., tn) -> T where T is a supported dtype 

29 

30 Examples 

31 -------- 

32 >>> from scipy.sparse._sputils import upcast 

33 >>> upcast('int32') 

34 <type 'numpy.int32'> 

35 >>> upcast('bool') 

36 <type 'numpy.bool_'> 

37 >>> upcast('int32','float32') 

38 <type 'numpy.float64'> 

39 >>> upcast('bool',complex,float) 

40 <type 'numpy.complex128'> 

41 

42 """ 

43 

44 t = _upcast_memo.get(hash(args)) 

45 if t is not None: 

46 return t 

47 

48 upcast = np.result_type(*args) 

49 

50 for t in supported_dtypes: 

51 if np.can_cast(upcast, t): 

52 _upcast_memo[hash(args)] = t 

53 return t 

54 

55 raise TypeError(f'no supported conversion for types: {args!r}') 

56 

57 

58def upcast_char(*args): 

59 """Same as `upcast` but taking dtype.char as input (faster).""" 

60 t = _upcast_memo.get(args) 

61 if t is not None: 

62 return t 

63 t = upcast(*map(np.dtype, args)) 

64 _upcast_memo[args] = t 

65 return t 

66 

67 

68def upcast_scalar(dtype, scalar): 

69 """Determine data type for binary operation between an array of 

70 type `dtype` and a scalar. 

71 """ 

72 return (np.array([0], dtype=dtype) * scalar).dtype 

73 

74 

75def downcast_intp_index(arr): 

76 """ 

77 Down-cast index array to np.intp dtype if it is of a larger dtype. 

78 

79 Raise an error if the array contains a value that is too large for 

80 intp. 

81 """ 

82 if arr.dtype.itemsize > np.dtype(np.intp).itemsize: 

83 if arr.size == 0: 

84 return arr.astype(np.intp) 

85 maxval = arr.max() 

86 minval = arr.min() 

87 if maxval > np.iinfo(np.intp).max or minval < np.iinfo(np.intp).min: 

88 raise ValueError("Cannot deal with arrays with indices larger " 

89 "than the machine maximum address size " 

90 "(e.g. 64-bit indices on 32-bit machine).") 

91 return arr.astype(np.intp) 

92 return arr 

93 

94 

95def to_native(A): 

96 """ 

97 Ensure that the data type of the NumPy array `A` has native byte order. 

98 

99 `A` must be a NumPy array. If the data type of `A` does not have native 

100 byte order, a copy of `A` with a native byte order is returned. Otherwise 

101 `A` is returned. 

102 """ 

103 dt = A.dtype 

104 if dt.isnative: 

105 # Don't call `asarray()` if A is already native, to avoid unnecessarily 

106 # creating a view of the input array. 

107 return A 

108 return np.asarray(A, dtype=dt.newbyteorder('native')) 

109 

110 

111def getdtype(dtype, a=None, default=None): 

112 """Function used to simplify argument processing. If 'dtype' is not 

113 specified (is None), returns a.dtype; otherwise returns a np.dtype 

114 object created from the specified dtype argument. If 'dtype' and 'a' 

115 are both None, construct a data type out of the 'default' parameter. 

116 Furthermore, 'dtype' must be in 'allowed' set. 

117 """ 

118 # TODO is this really what we want? 

119 if dtype is None: 

120 try: 

121 newdtype = a.dtype 

122 except AttributeError as e: 

123 if default is not None: 

124 newdtype = np.dtype(default) 

125 else: 

126 raise TypeError("could not interpret data type") from e 

127 else: 

128 newdtype = np.dtype(dtype) 

129 if newdtype == np.object_: 

130 raise ValueError( 

131 "object dtype is not supported by sparse matrices" 

132 ) 

133 

134 return newdtype 

135 

136 

137def getdata(obj, dtype=None, copy=False) -> np.ndarray: 

138 """ 

139 This is a wrapper of `np.array(obj, dtype=dtype, copy=copy)` 

140 that will generate a warning if the result is an object array. 

141 """ 

142 data = np.array(obj, dtype=dtype, copy=copy) 

143 # Defer to getdtype for checking that the dtype is OK. 

144 # This is called for the validation only; we don't need the return value. 

145 getdtype(data.dtype) 

146 return data 

147 

148 

149def get_index_dtype(arrays=(), maxval=None, check_contents=False): 

150 """ 

151 Based on input (integer) arrays `a`, determine a suitable index data 

152 type that can hold the data in the arrays. 

153 

154 Parameters 

155 ---------- 

156 arrays : tuple of array_like 

157 Input arrays whose types/contents to check 

158 maxval : float, optional 

159 Maximum value needed 

160 check_contents : bool, optional 

161 Whether to check the values in the arrays and not just their types. 

162 Default: False (check only the types) 

163 

164 Returns 

165 ------- 

166 dtype : dtype 

167 Suitable index data type (int32 or int64) 

168 

169 """ 

170 

171 int32min = np.int32(np.iinfo(np.int32).min) 

172 int32max = np.int32(np.iinfo(np.int32).max) 

173 

174 # not using intc directly due to misinteractions with pythran 

175 dtype = np.int32 if np.intc().itemsize == 4 else np.int64 

176 if maxval is not None: 

177 maxval = np.int64(maxval) 

178 if maxval > int32max: 

179 dtype = np.int64 

180 

181 if isinstance(arrays, np.ndarray): 

182 arrays = (arrays,) 

183 

184 for arr in arrays: 

185 arr = np.asarray(arr) 

186 if not np.can_cast(arr.dtype, np.int32): 

187 if check_contents: 

188 if arr.size == 0: 

189 # a bigger type not needed 

190 continue 

191 elif np.issubdtype(arr.dtype, np.integer): 

192 maxval = arr.max() 

193 minval = arr.min() 

194 if minval >= int32min and maxval <= int32max: 

195 # a bigger type not needed 

196 continue 

197 

198 dtype = np.int64 

199 break 

200 

201 return dtype 

202 

203 

204def get_sum_dtype(dtype: np.dtype) -> np.dtype: 

205 """Mimic numpy's casting for np.sum""" 

206 if dtype.kind == 'u' and np.can_cast(dtype, np.uint): 

207 return np.uint 

208 if np.can_cast(dtype, np.int_): 

209 return np.int_ 

210 return dtype 

211 

212 

213def isscalarlike(x) -> bool: 

214 """Is x either a scalar, an array scalar, or a 0-dim array?""" 

215 return np.isscalar(x) or (isdense(x) and x.ndim == 0) 

216 

217 

218def isintlike(x) -> bool: 

219 """Is x appropriate as an index into a sparse matrix? Returns True 

220 if it can be cast safely to a machine int. 

221 """ 

222 # Fast-path check to eliminate non-scalar values. operator.index would 

223 # catch this case too, but the exception catching is slow. 

224 if np.ndim(x) != 0: 

225 return False 

226 try: 

227 operator.index(x) 

228 except (TypeError, ValueError): 

229 try: 

230 loose_int = bool(int(x) == x) 

231 except (TypeError, ValueError): 

232 return False 

233 if loose_int: 

234 msg = "Inexact indices into sparse matrices are not allowed" 

235 raise ValueError(msg) 

236 return loose_int 

237 return True 

238 

239 

240def isshape(x, nonneg=False, *, allow_1d=False) -> bool: 

241 """Is x a valid tuple of dimensions? 

242 

243 If nonneg, also checks that the dimensions are non-negative. 

244 If allow_1d, shapes of length 1 or 2 are allowed. 

245 """ 

246 ndim = len(x) 

247 if ndim != 2 and not (allow_1d and ndim == 1): 

248 return False 

249 for d in x: 

250 if not isintlike(d): 

251 return False 

252 if nonneg and d < 0: 

253 return False 

254 return True 

255 

256 

257def issequence(t) -> bool: 

258 return ((isinstance(t, (list, tuple)) and 

259 (len(t) == 0 or np.isscalar(t[0]))) or 

260 (isinstance(t, np.ndarray) and (t.ndim == 1))) 

261 

262 

263def ismatrix(t) -> bool: 

264 return ((isinstance(t, (list, tuple)) and 

265 len(t) > 0 and issequence(t[0])) or 

266 (isinstance(t, np.ndarray) and t.ndim == 2)) 

267 

268 

269def isdense(x) -> bool: 

270 return isinstance(x, np.ndarray) 

271 

272 

273def validateaxis(axis) -> None: 

274 if axis is None: 

275 return 

276 axis_type = type(axis) 

277 

278 # In NumPy, you can pass in tuples for 'axis', but they are 

279 # not very useful for sparse matrices given their limited 

280 # dimensions, so let's make it explicit that they are not 

281 # allowed to be passed in 

282 if axis_type == tuple: 

283 raise TypeError("Tuples are not accepted for the 'axis' parameter. " 

284 "Please pass in one of the following: " 

285 "{-2, -1, 0, 1, None}.") 

286 

287 # If not a tuple, check that the provided axis is actually 

288 # an integer and raise a TypeError similar to NumPy's 

289 if not np.issubdtype(np.dtype(axis_type), np.integer): 

290 raise TypeError(f"axis must be an integer, not {axis_type.__name__}") 

291 

292 if not (-2 <= axis <= 1): 

293 raise ValueError("axis out of range") 

294 

295 

296def check_shape(args, current_shape=None, *, allow_1d=False) -> tuple[int, ...]: 

297 """Imitate numpy.matrix handling of shape arguments 

298 

299 Parameters 

300 ---------- 

301 args : array_like 

302 Data structures providing information about the shape of the sparse array. 

303 current_shape : tuple, optional 

304 The current shape of the sparse array or matrix. 

305 If None (default), the current shape will be inferred from args. 

306 allow_1d : bool, optional 

307 If True, then 1-D or 2-D arrays are accepted. 

308 If False (default), then only 2-D arrays are accepted and an error is 

309 raised otherwise. 

310 

311 Returns 

312 ------- 

313 new_shape: tuple 

314 The new shape after validation. 

315 """ 

316 if len(args) == 0: 

317 raise TypeError("function missing 1 required positional argument: " 

318 "'shape'") 

319 if len(args) == 1: 

320 try: 

321 shape_iter = iter(args[0]) 

322 except TypeError: 

323 new_shape = (operator.index(args[0]), ) 

324 else: 

325 new_shape = tuple(operator.index(arg) for arg in shape_iter) 

326 else: 

327 new_shape = tuple(operator.index(arg) for arg in args) 

328 

329 if current_shape is None: 

330 if allow_1d: 

331 if len(new_shape) not in (1, 2): 

332 raise ValueError('shape must be a 1- or 2-tuple of positive ' 

333 'integers') 

334 elif len(new_shape) != 2: 

335 raise ValueError('shape must be a 2-tuple of positive integers') 

336 if any(d < 0 for d in new_shape): 

337 raise ValueError("'shape' elements cannot be negative") 

338 else: 

339 # Check the current size only if needed 

340 current_size = prod(current_shape) 

341 

342 # Check for negatives 

343 negative_indexes = [i for i, x in enumerate(new_shape) if x < 0] 

344 if not negative_indexes: 

345 new_size = prod(new_shape) 

346 if new_size != current_size: 

347 raise ValueError('cannot reshape array of size {} into shape {}' 

348 .format(current_size, new_shape)) 

349 elif len(negative_indexes) == 1: 

350 skip = negative_indexes[0] 

351 specified = prod(new_shape[:skip] + new_shape[skip+1:]) 

352 unspecified, remainder = divmod(current_size, specified) 

353 if remainder != 0: 

354 err_shape = tuple('newshape' if x < 0 else x for x in new_shape) 

355 raise ValueError('cannot reshape array of size {} into shape {}' 

356 ''.format(current_size, err_shape)) 

357 new_shape = new_shape[:skip] + (unspecified,) + new_shape[skip+1:] 

358 else: 

359 raise ValueError('can only specify one unknown dimension') 

360 

361 if len(new_shape) != 2 and not (allow_1d and len(new_shape) == 1): 

362 raise ValueError('matrix shape must be two-dimensional') 

363 

364 return new_shape 

365 

366 

367def check_reshape_kwargs(kwargs): 

368 """Unpack keyword arguments for reshape function. 

369 

370 This is useful because keyword arguments after star arguments are not 

371 allowed in Python 2, but star keyword arguments are. This function unpacks 

372 'order' and 'copy' from the star keyword arguments (with defaults) and 

373 throws an error for any remaining. 

374 """ 

375 

376 order = kwargs.pop('order', 'C') 

377 copy = kwargs.pop('copy', False) 

378 if kwargs: # Some unused kwargs remain 

379 raise TypeError('reshape() got unexpected keywords arguments: {}' 

380 .format(', '.join(kwargs.keys()))) 

381 return order, copy 

382 

383 

384def is_pydata_spmatrix(m) -> bool: 

385 """ 

386 Check whether object is pydata/sparse matrix, avoiding importing the module. 

387 """ 

388 base_cls = getattr(sys.modules.get('sparse'), 'SparseArray', None) 

389 return base_cls is not None and isinstance(m, base_cls) 

390 

391 

392def convert_pydata_sparse_to_scipy( 

393 arg: Any, target_format: Optional[Literal["csc", "csr"]] = None 

394) -> Union[Any, "sp.spmatrix"]: 

395 """ 

396 Convert a pydata/sparse array to scipy sparse matrix, 

397 pass through anything else. 

398 """ 

399 if is_pydata_spmatrix(arg): 

400 arg = arg.to_scipy_sparse() 

401 if target_format is not None: 

402 arg = arg.asformat(target_format) 

403 elif arg.format not in ("csc", "csr"): 

404 arg = arg.tocsc() 

405 return arg 

406 

407 

408############################################################################### 

409# Wrappers for NumPy types that are deprecated 

410 

411# Numpy versions of these functions raise deprecation warnings, the 

412# ones below do not. 

413 

414def matrix(*args, **kwargs): 

415 return np.array(*args, **kwargs).view(np.matrix) 

416 

417 

418def asmatrix(data, dtype=None): 

419 if isinstance(data, np.matrix) and (dtype is None or data.dtype == dtype): 

420 return data 

421 return np.asarray(data, dtype=dtype).view(np.matrix) 

422 

423############################################################################### 

424 

425 

426def _todata(s) -> np.ndarray: 

427 """Access nonzero values, possibly after summing duplicates. 

428 

429 Parameters 

430 ---------- 

431 s : sparse array 

432 Input sparse array. 

433 

434 Returns 

435 ------- 

436 data: ndarray 

437 Nonzero values of the array, with shape (s.nnz,) 

438 

439 """ 

440 if isinstance(s, sp._data._data_matrix): 

441 return s._deduped_data() 

442 

443 if isinstance(s, sp.dok_array): 

444 return np.fromiter(s.values(), dtype=s.dtype, count=s.nnz) 

445 

446 if isinstance(s, sp.lil_array): 

447 data = np.empty(s.nnz, dtype=s.dtype) 

448 sp._csparsetools.lil_flatten_to_array(s.data, data) 

449 return data 

450 

451 return s.tocoo()._deduped_data()