Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_trustregion_exact.py: 10%

138 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1"""Nearly exact trust-region optimization subproblem.""" 

2import numpy as np 

3from scipy.linalg import (norm, get_lapack_funcs, solve_triangular, 

4 cho_solve) 

5from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem) 

6 

7__all__ = ['_minimize_trustregion_exact', 

8 'estimate_smallest_singular_value', 

9 'singular_leading_submatrix', 

10 'IterativeSubproblem'] 

11 

12 

13def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None, 

14 **trust_region_options): 

15 """ 

16 Minimization of scalar function of one or more variables using 

17 a nearly exact trust-region algorithm. 

18 

19 Options 

20 ------- 

21 initial_tr_radius : float 

22 Initial trust-region radius. 

23 max_tr_radius : float 

24 Maximum value of the trust-region radius. No steps that are longer 

25 than this value will be proposed. 

26 eta : float 

27 Trust region related acceptance stringency for proposed steps. 

28 gtol : float 

29 Gradient norm must be less than ``gtol`` before successful 

30 termination. 

31 """ 

32 

33 if jac is None: 

34 raise ValueError('Jacobian is required for trust region ' 

35 'exact minimization.') 

36 if not callable(hess): 

37 raise ValueError('Hessian matrix is required for trust region ' 

38 'exact minimization.') 

39 return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess, 

40 subproblem=IterativeSubproblem, 

41 **trust_region_options) 

42 

43 

44def estimate_smallest_singular_value(U): 

45 """Given upper triangular matrix ``U`` estimate the smallest singular 

46 value and the correspondent right singular vector in O(n**2) operations. 

47 

48 Parameters 

49 ---------- 

50 U : ndarray 

51 Square upper triangular matrix. 

52 

53 Returns 

54 ------- 

55 s_min : float 

56 Estimated smallest singular value of the provided matrix. 

57 z_min : ndarray 

58 Estimatied right singular vector. 

59 

60 Notes 

61 ----- 

62 The procedure is based on [1]_ and is done in two steps. First, it finds 

63 a vector ``e`` with components selected from {+1, -1} such that the 

64 solution ``w`` from the system ``U.T w = e`` is as large as possible. 

65 Next it estimate ``U v = w``. The smallest singular value is close 

66 to ``norm(w)/norm(v)`` and the right singular vector is close 

67 to ``v/norm(v)``. 

68 

69 The estimation will be better more ill-conditioned is the matrix. 

70 

71 References 

72 ---------- 

73 .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H. 

74 An estimate for the condition number of a matrix. 1979. 

75 SIAM Journal on Numerical Analysis, 16(2), 368-375. 

76 """ 

77 

78 U = np.atleast_2d(U) 

79 m, n = U.shape 

80 

81 if m != n: 

82 raise ValueError("A square triangular matrix should be provided.") 

83 

84 # A vector `e` with components selected from {+1, -1} 

85 # is selected so that the solution `w` to the system 

86 # `U.T w = e` is as large as possible. Implementation 

87 # based on algorithm 3.5.1, p. 142, from reference [2] 

88 # adapted for lower triangular matrix. 

89 

90 p = np.zeros(n) 

91 w = np.empty(n) 

92 

93 # Implemented according to: Golub, G. H., Van Loan, C. F. (2013). 

94 # "Matrix computations". Forth Edition. JHU press. pp. 140-142. 

95 for k in range(n): 

96 wp = (1-p[k]) / U.T[k, k] 

97 wm = (-1-p[k]) / U.T[k, k] 

98 pp = p[k+1:] + U.T[k+1:, k]*wp 

99 pm = p[k+1:] + U.T[k+1:, k]*wm 

100 

101 if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1): 

102 w[k] = wp 

103 p[k+1:] = pp 

104 else: 

105 w[k] = wm 

106 p[k+1:] = pm 

107 

108 # The system `U v = w` is solved using backward substitution. 

109 v = solve_triangular(U, w) 

110 

111 v_norm = norm(v) 

112 w_norm = norm(w) 

113 

114 # Smallest singular value 

115 s_min = w_norm / v_norm 

116 

117 # Associated vector 

118 z_min = v / v_norm 

119 

120 return s_min, z_min 

121 

122 

123def gershgorin_bounds(H): 

124 """ 

125 Given a square matrix ``H`` compute upper 

126 and lower bounds for its eigenvalues (Gregoshgorin Bounds). 

127 Defined ref. [1]. 

128 

129 References 

130 ---------- 

131 .. [1] Conn, A. R., Gould, N. I., & Toint, P. L. 

132 Trust region methods. 2000. Siam. pp. 19. 

133 """ 

134 

135 H_diag = np.diag(H) 

136 H_diag_abs = np.abs(H_diag) 

137 H_row_sums = np.sum(np.abs(H), axis=1) 

138 lb = np.min(H_diag + H_diag_abs - H_row_sums) 

139 ub = np.max(H_diag - H_diag_abs + H_row_sums) 

140 

141 return lb, ub 

142 

143 

144def singular_leading_submatrix(A, U, k): 

145 """ 

146 Compute term that makes the leading ``k`` by ``k`` 

147 submatrix from ``A`` singular. 

148 

149 Parameters 

150 ---------- 

151 A : ndarray 

152 Symmetric matrix that is not positive definite. 

153 U : ndarray 

154 Upper triangular matrix resulting of an incomplete 

155 Cholesky decomposition of matrix ``A``. 

156 k : int 

157 Positive integer such that the leading k by k submatrix from 

158 `A` is the first non-positive definite leading submatrix. 

159 

160 Returns 

161 ------- 

162 delta : float 

163 Amount that should be added to the element (k, k) of the 

164 leading k by k submatrix of ``A`` to make it singular. 

165 v : ndarray 

166 A vector such that ``v.T B v = 0``. Where B is the matrix A after 

167 ``delta`` is added to its element (k, k). 

168 """ 

169 

170 # Compute delta 

171 delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1] 

172 

173 n = len(A) 

174 

175 # Inicialize v 

176 v = np.zeros(n) 

177 v[k-1] = 1 

178 

179 # Compute the remaining values of v by solving a triangular system. 

180 if k != 1: 

181 v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1]) 

182 

183 return delta, v 

184 

185 

186class IterativeSubproblem(BaseQuadraticSubproblem): 

187 """Quadratic subproblem solved by nearly exact iterative method. 

188 

189 Notes 

190 ----- 

191 This subproblem solver was based on [1]_, [2]_ and [3]_, 

192 which implement similar algorithms. The algorithm is basically 

193 that of [1]_ but ideas from [2]_ and [3]_ were also used. 

194 

195 References 

196 ---------- 

197 .. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods", 

198 Siam, pp. 169-200, 2000. 

199 .. [2] J. Nocedal and S. Wright, "Numerical optimization", 

200 Springer Science & Business Media. pp. 83-91, 2006. 

201 .. [3] J.J. More and D.C. Sorensen, "Computing a trust region step", 

202 SIAM Journal on Scientific and Statistical Computing, vol. 4(3), 

203 pp. 553-572, 1983. 

204 """ 

205 

206 # UPDATE_COEFF appears in reference [1]_ 

207 # in formula 7.3.14 (p. 190) named as "theta". 

208 # As recommended there it value is fixed in 0.01. 

209 UPDATE_COEFF = 0.01 

210 

211 EPS = np.finfo(float).eps 

212 

213 def __init__(self, x, fun, jac, hess, hessp=None, 

214 k_easy=0.1, k_hard=0.2): 

215 

216 super().__init__(x, fun, jac, hess) 

217 

218 # When the trust-region shrinks in two consecutive 

219 # calculations (``tr_radius < previous_tr_radius``) 

220 # the lower bound ``lambda_lb`` may be reused, 

221 # facilitating the convergence. To indicate no 

222 # previous value is known at first ``previous_tr_radius`` 

223 # is set to -1 and ``lambda_lb`` to None. 

224 self.previous_tr_radius = -1 

225 self.lambda_lb = None 

226 

227 self.niter = 0 

228 

229 # ``k_easy`` and ``k_hard`` are parameters used 

230 # to determine the stop criteria to the iterative 

231 # subproblem solver. Take a look at pp. 194-197 

232 # from reference _[1] for a more detailed description. 

233 self.k_easy = k_easy 

234 self.k_hard = k_hard 

235 

236 # Get Lapack function for cholesky decomposition. 

237 # The implemented SciPy wrapper does not return 

238 # the incomplete factorization needed by the method. 

239 self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,)) 

240 

241 # Get info about Hessian 

242 self.dimension = len(self.hess) 

243 self.hess_gershgorin_lb,\ 

244 self.hess_gershgorin_ub = gershgorin_bounds(self.hess) 

245 self.hess_inf = norm(self.hess, np.Inf) 

246 self.hess_fro = norm(self.hess, 'fro') 

247 

248 # A constant such that for vectors smaler than that 

249 # backward substituition is not reliable. It was stabilished 

250 # based on Golub, G. H., Van Loan, C. F. (2013). 

251 # "Matrix computations". Forth Edition. JHU press., p.165. 

252 self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf 

253 

254 def _initial_values(self, tr_radius): 

255 """Given a trust radius, return a good initial guess for 

256 the damping factor, the lower bound and the upper bound. 

257 The values were chosen accordingly to the guidelines on 

258 section 7.3.8 (p. 192) from [1]_. 

259 """ 

260 

261 # Upper bound for the damping factor 

262 lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb, 

263 self.hess_fro, 

264 self.hess_inf)) 

265 

266 # Lower bound for the damping factor 

267 lambda_lb = max(0, -min(self.hess.diagonal()), 

268 self.jac_mag/tr_radius - min(self.hess_gershgorin_ub, 

269 self.hess_fro, 

270 self.hess_inf)) 

271 

272 # Improve bounds with previous info 

273 if tr_radius < self.previous_tr_radius: 

274 lambda_lb = max(self.lambda_lb, lambda_lb) 

275 

276 # Initial guess for the damping factor 

277 if lambda_lb == 0: 

278 lambda_initial = 0 

279 else: 

280 lambda_initial = max(np.sqrt(lambda_lb * lambda_ub), 

281 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

282 

283 return lambda_initial, lambda_lb, lambda_ub 

284 

285 def solve(self, tr_radius): 

286 """Solve quadratic subproblem""" 

287 

288 lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius) 

289 n = self.dimension 

290 hits_boundary = True 

291 already_factorized = False 

292 self.niter = 0 

293 

294 while True: 

295 

296 # Compute Cholesky factorization 

297 if already_factorized: 

298 already_factorized = False 

299 else: 

300 H = self.hess+lambda_current*np.eye(n) 

301 U, info = self.cholesky(H, lower=False, 

302 overwrite_a=False, 

303 clean=True) 

304 

305 self.niter += 1 

306 

307 # Check if factorization succeeded 

308 if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO: 

309 # Successful factorization 

310 

311 # Solve `U.T U p = s` 

312 p = cho_solve((U, False), -self.jac) 

313 

314 p_norm = norm(p) 

315 

316 # Check for interior convergence 

317 if p_norm <= tr_radius and lambda_current == 0: 

318 hits_boundary = False 

319 break 

320 

321 # Solve `U.T w = p` 

322 w = solve_triangular(U, p, trans='T') 

323 

324 w_norm = norm(w) 

325 

326 # Compute Newton step accordingly to 

327 # formula (4.44) p.87 from ref [2]_. 

328 delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius 

329 lambda_new = lambda_current + delta_lambda 

330 

331 if p_norm < tr_radius: # Inside boundary 

332 s_min, z_min = estimate_smallest_singular_value(U) 

333 

334 ta, tb = self.get_boundaries_intersections(p, z_min, 

335 tr_radius) 

336 

337 # Choose `step_len` with the smallest magnitude. 

338 # The reason for this choice is explained at 

339 # ref [3]_, p. 6 (Immediately before the formula 

340 # for `tau`). 

341 step_len = min([ta, tb], key=abs) 

342 

343 # Compute the quadratic term (p.T*H*p) 

344 quadratic_term = np.dot(p, np.dot(H, p)) 

345 

346 # Check stop criteria 

347 relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2) 

348 if relative_error <= self.k_hard: 

349 p += step_len * z_min 

350 break 

351 

352 # Update uncertanty bounds 

353 lambda_ub = lambda_current 

354 lambda_lb = max(lambda_lb, lambda_current - s_min**2) 

355 

356 # Compute Cholesky factorization 

357 H = self.hess + lambda_new*np.eye(n) 

358 c, info = self.cholesky(H, lower=False, 

359 overwrite_a=False, 

360 clean=True) 

361 

362 # Check if the factorization have succeeded 

363 # 

364 if info == 0: # Successful factorization 

365 # Update damping factor 

366 lambda_current = lambda_new 

367 already_factorized = True 

368 else: # Unsuccessful factorization 

369 # Update uncertanty bounds 

370 lambda_lb = max(lambda_lb, lambda_new) 

371 

372 # Update damping factor 

373 lambda_current = max(np.sqrt(lambda_lb * lambda_ub), 

374 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

375 

376 else: # Outside boundary 

377 # Check stop criteria 

378 relative_error = abs(p_norm - tr_radius) / tr_radius 

379 if relative_error <= self.k_easy: 

380 break 

381 

382 # Update uncertanty bounds 

383 lambda_lb = lambda_current 

384 

385 # Update damping factor 

386 lambda_current = lambda_new 

387 

388 elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO: 

389 # jac_mag very close to zero 

390 

391 # Check for interior convergence 

392 if lambda_current == 0: 

393 p = np.zeros(n) 

394 hits_boundary = False 

395 break 

396 

397 s_min, z_min = estimate_smallest_singular_value(U) 

398 step_len = tr_radius 

399 

400 # Check stop criteria 

401 if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2: 

402 p = step_len * z_min 

403 break 

404 

405 # Update uncertanty bounds 

406 lambda_ub = lambda_current 

407 lambda_lb = max(lambda_lb, lambda_current - s_min**2) 

408 

409 # Update damping factor 

410 lambda_current = max(np.sqrt(lambda_lb * lambda_ub), 

411 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

412 

413 else: # Unsuccessful factorization 

414 

415 # Compute auxiliary terms 

416 delta, v = singular_leading_submatrix(H, U, info) 

417 v_norm = norm(v) 

418 

419 # Update uncertanty interval 

420 lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2) 

421 

422 # Update damping factor 

423 lambda_current = max(np.sqrt(lambda_lb * lambda_ub), 

424 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) 

425 

426 self.lambda_lb = lambda_lb 

427 self.lambda_current = lambda_current 

428 self.previous_tr_radius = tr_radius 

429 

430 return p, hits_boundary