Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/linalg/

1""" Sketching-based Matrix Computations """

3# Author: Jordi Montes <jomsdev@gmail.com>

4# August 28, 2017

6import numpy as np

8from scipy._lib._util import check_random_state, rng_integers

9from scipy.sparse import csc_matrix

11__all__ = ['clarkson_woodruff_transform']

14def cwt_matrix(n_rows, n_columns, seed=None):

15 r"""

16 Generate a matrix S which represents a Clarkson-Woodruff transform.

18 Given the desired size of matrix, the method returns a matrix S of size

19 (n_rows, n_columns) where each column has all the entries set to 0

20 except for one position which has been randomly set to +1 or -1 with

21 equal probability.

23 Parameters

24 ----------

25 n_rows : int

26 Number of rows of S

27 n_columns : int

28 Number of columns of S

29 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

30 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

31 singleton is used.

32 If `seed` is an int, a new ``RandomState`` instance is used,

33 seeded with `seed`.

34 If `seed` is already a ``Generator`` or ``RandomState`` instance then

35 that instance is used.

37 Returns

38 -------

39 S : (n_rows, n_columns) csc_matrix

40 The returned matrix has ``n_columns`` nonzero entries.

42 Notes

43 -----

44 Given a matrix A, with probability at least 9/10,

45 .. math:: \|SA\| = (1 \pm \epsilon)\|A\|

46 Where the error epsilon is related to the size of S.

47 """

48 rng = check_random_state(seed)

49 rows = rng_integers(rng, 0, n_rows, n_columns)

50 cols = np.arange(n_columns+1)

51 signs = rng.choice([1, -1], n_columns)

52 S = csc_matrix((signs, rows, cols),shape=(n_rows, n_columns))

53 return S

56def clarkson_woodruff_transform(input_matrix, sketch_size, seed=None):

57 r"""

58 Applies a Clarkson-Woodruff Transform/sketch to the input matrix.

60 Given an input_matrix ``A`` of size ``(n, d)``, compute a matrix ``A'`` of

61 size (sketch_size, d) so that

63 .. math:: \|Ax\| \approx \|A'x\|

65 with high probability via the Clarkson-Woodruff Transform, otherwise

66 known as the CountSketch matrix.

68 Parameters

69 ----------

70 input_matrix : array_like

71 Input matrix, of shape ``(n, d)``.

72 sketch_size : int

73 Number of rows for the sketch.

74 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

75 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

76 singleton is used.

77 If `seed` is an int, a new ``RandomState`` instance is used,

78 seeded with `seed`.

79 If `seed` is already a ``Generator`` or ``RandomState`` instance then

80 that instance is used.

82 Returns

83 -------

84 A' : array_like

85 Sketch of the input matrix ``A``, of size ``(sketch_size, d)``.

87 Notes

88 -----

89 To make the statement

91 .. math:: \|Ax\| \approx \|A'x\|

93 precise, observe the following result which is adapted from the

94 proof of Theorem 14 of [2]_ via Markov's Inequality. If we have

95 a sketch size ``sketch_size=k`` which is at least

97 .. math:: k \geq \frac{2}{\epsilon^2\delta}

99 Then for any fixed vector ``x``,

100

101 .. math:: \|Ax\| = (1\pm\epsilon)\|A'x\|

102

103 with probability at least one minus delta.

104

105 This implementation takes advantage of sparsity: computing

106 a sketch takes time proportional to ``A.nnz``. Data ``A`` which

107 is in ``scipy.sparse.csc_matrix`` format gives the quickest

108 computation time for sparse input.

109

110 >>> import numpy as np

111 >>> from scipy import linalg

112 >>> from scipy import sparse

113 >>> rng = np.random.default_rng()

114 >>> n_rows, n_columns, density, sketch_n_rows = 15000, 100, 0.01, 200

115 >>> A = sparse.rand(n_rows, n_columns, density=density, format='csc')

116 >>> B = sparse.rand(n_rows, n_columns, density=density, format='csr')

117 >>> C = sparse.rand(n_rows, n_columns, density=density, format='coo')

118 >>> D = rng.standard_normal((n_rows, n_columns))

119 >>> SA = linalg.clarkson_woodruff_transform(A, sketch_n_rows) # fastest

120 >>> SB = linalg.clarkson_woodruff_transform(B, sketch_n_rows) # fast

121 >>> SC = linalg.clarkson_woodruff_transform(C, sketch_n_rows) # slower

122 >>> SD = linalg.clarkson_woodruff_transform(D, sketch_n_rows) # slowest

123

124 That said, this method does perform well on dense inputs, just slower

125 on a relative scale.

126

127 References

128 ----------

129 .. [1] Kenneth L. Clarkson and David P. Woodruff. Low rank approximation

130 and regression in input sparsity time. In STOC, 2013.

131 .. [2] David P. Woodruff. Sketching as a tool for numerical linear algebra.

132 In Foundations and Trends in Theoretical Computer Science, 2014.

133

134 Examples

135 --------

136 Create a big dense matrix ``A`` for the example:

137

138 >>> import numpy as np

139 >>> from scipy import linalg

140 >>> n_rows, n_columns = 15000, 100

141 >>> rng = np.random.default_rng()

142 >>> A = rng.standard_normal((n_rows, n_columns))

143

144 Apply the transform to create a new matrix with 200 rows:

145

146 >>> sketch_n_rows = 200

147 >>> sketch = linalg.clarkson_woodruff_transform(A, sketch_n_rows, seed=rng)

148 >>> sketch.shape

149 (200, 100)

150

151 Now with high probability, the true norm is close to the sketched norm

152 in absolute value.

153

154 >>> linalg.norm(A)

155 1224.2812927123198

156 >>> linalg.norm(sketch)

157 1226.518328407333

158

159 Similarly, applying our sketch preserves the solution to a linear

160 regression of :math:`\min \|Ax - b\|`.

161

162 >>> b = rng.standard_normal(n_rows)

163 >>> x = linalg.lstsq(A, b)[0]

164 >>> Ab = np.hstack((A, b.reshape(-1, 1)))

165 >>> SAb = linalg.clarkson_woodruff_transform(Ab, sketch_n_rows, seed=rng)

166 >>> SA, Sb = SAb[:, :-1], SAb[:, -1]

167 >>> x_sketched = linalg.lstsq(SA, Sb)[0]

168

169 As with the matrix norm example, ``linalg.norm(A @ x - b)`` is close

170 to ``linalg.norm(A @ x_sketched - b)`` with high probability.

171

172 >>> linalg.norm(A @ x - b)

173 122.83242365433877

174 >>> linalg.norm(A @ x_sketched - b)

175 166.58473879945151

176

177 """

178 S = cwt_matrix(sketch_size, input_matrix.shape[0], seed)

179 return S.dot(input_matrix)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/linalg/_sketches.py: 43%

14 statements