Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_trustregion_exact.py: 10%
138 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""Nearly exact trust-region optimization subproblem."""
2import numpy as np
3from scipy.linalg import (norm, get_lapack_funcs, solve_triangular,
4 cho_solve)
5from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
7__all__ = ['_minimize_trustregion_exact',
8 'estimate_smallest_singular_value',
9 'singular_leading_submatrix',
10 'IterativeSubproblem']
13def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None,
14 **trust_region_options):
15 """
16 Minimization of scalar function of one or more variables using
17 a nearly exact trust-region algorithm.
19 Options
20 -------
21 initial_tr_radius : float
22 Initial trust-region radius.
23 max_tr_radius : float
24 Maximum value of the trust-region radius. No steps that are longer
25 than this value will be proposed.
26 eta : float
27 Trust region related acceptance stringency for proposed steps.
28 gtol : float
29 Gradient norm must be less than ``gtol`` before successful
30 termination.
31 """
33 if jac is None:
34 raise ValueError('Jacobian is required for trust region '
35 'exact minimization.')
36 if not callable(hess):
37 raise ValueError('Hessian matrix is required for trust region '
38 'exact minimization.')
39 return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
40 subproblem=IterativeSubproblem,
41 **trust_region_options)
44def estimate_smallest_singular_value(U):
45 """Given upper triangular matrix ``U`` estimate the smallest singular
46 value and the correspondent right singular vector in O(n**2) operations.
48 Parameters
49 ----------
50 U : ndarray
51 Square upper triangular matrix.
53 Returns
54 -------
55 s_min : float
56 Estimated smallest singular value of the provided matrix.
57 z_min : ndarray
58 Estimatied right singular vector.
60 Notes
61 -----
62 The procedure is based on [1]_ and is done in two steps. First, it finds
63 a vector ``e`` with components selected from {+1, -1} such that the
64 solution ``w`` from the system ``U.T w = e`` is as large as possible.
65 Next it estimate ``U v = w``. The smallest singular value is close
66 to ``norm(w)/norm(v)`` and the right singular vector is close
67 to ``v/norm(v)``.
69 The estimation will be better more ill-conditioned is the matrix.
71 References
72 ----------
73 .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H.
74 An estimate for the condition number of a matrix. 1979.
75 SIAM Journal on Numerical Analysis, 16(2), 368-375.
76 """
78 U = np.atleast_2d(U)
79 m, n = U.shape
81 if m != n:
82 raise ValueError("A square triangular matrix should be provided.")
84 # A vector `e` with components selected from {+1, -1}
85 # is selected so that the solution `w` to the system
86 # `U.T w = e` is as large as possible. Implementation
87 # based on algorithm 3.5.1, p. 142, from reference [2]
88 # adapted for lower triangular matrix.
90 p = np.zeros(n)
91 w = np.empty(n)
93 # Implemented according to: Golub, G. H., Van Loan, C. F. (2013).
94 # "Matrix computations". Forth Edition. JHU press. pp. 140-142.
95 for k in range(n):
96 wp = (1-p[k]) / U.T[k, k]
97 wm = (-1-p[k]) / U.T[k, k]
98 pp = p[k+1:] + U.T[k+1:, k]*wp
99 pm = p[k+1:] + U.T[k+1:, k]*wm
101 if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1):
102 w[k] = wp
103 p[k+1:] = pp
104 else:
105 w[k] = wm
106 p[k+1:] = pm
108 # The system `U v = w` is solved using backward substitution.
109 v = solve_triangular(U, w)
111 v_norm = norm(v)
112 w_norm = norm(w)
114 # Smallest singular value
115 s_min = w_norm / v_norm
117 # Associated vector
118 z_min = v / v_norm
120 return s_min, z_min
123def gershgorin_bounds(H):
124 """
125 Given a square matrix ``H`` compute upper
126 and lower bounds for its eigenvalues (Gregoshgorin Bounds).
127 Defined ref. [1].
129 References
130 ----------
131 .. [1] Conn, A. R., Gould, N. I., & Toint, P. L.
132 Trust region methods. 2000. Siam. pp. 19.
133 """
135 H_diag = np.diag(H)
136 H_diag_abs = np.abs(H_diag)
137 H_row_sums = np.sum(np.abs(H), axis=1)
138 lb = np.min(H_diag + H_diag_abs - H_row_sums)
139 ub = np.max(H_diag - H_diag_abs + H_row_sums)
141 return lb, ub
144def singular_leading_submatrix(A, U, k):
145 """
146 Compute term that makes the leading ``k`` by ``k``
147 submatrix from ``A`` singular.
149 Parameters
150 ----------
151 A : ndarray
152 Symmetric matrix that is not positive definite.
153 U : ndarray
154 Upper triangular matrix resulting of an incomplete
155 Cholesky decomposition of matrix ``A``.
156 k : int
157 Positive integer such that the leading k by k submatrix from
158 `A` is the first non-positive definite leading submatrix.
160 Returns
161 -------
162 delta : float
163 Amount that should be added to the element (k, k) of the
164 leading k by k submatrix of ``A`` to make it singular.
165 v : ndarray
166 A vector such that ``v.T B v = 0``. Where B is the matrix A after
167 ``delta`` is added to its element (k, k).
168 """
170 # Compute delta
171 delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1]
173 n = len(A)
175 # Inicialize v
176 v = np.zeros(n)
177 v[k-1] = 1
179 # Compute the remaining values of v by solving a triangular system.
180 if k != 1:
181 v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1])
183 return delta, v
186class IterativeSubproblem(BaseQuadraticSubproblem):
187 """Quadratic subproblem solved by nearly exact iterative method.
189 Notes
190 -----
191 This subproblem solver was based on [1]_, [2]_ and [3]_,
192 which implement similar algorithms. The algorithm is basically
193 that of [1]_ but ideas from [2]_ and [3]_ were also used.
195 References
196 ----------
197 .. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods",
198 Siam, pp. 169-200, 2000.
199 .. [2] J. Nocedal and S. Wright, "Numerical optimization",
200 Springer Science & Business Media. pp. 83-91, 2006.
201 .. [3] J.J. More and D.C. Sorensen, "Computing a trust region step",
202 SIAM Journal on Scientific and Statistical Computing, vol. 4(3),
203 pp. 553-572, 1983.
204 """
206 # UPDATE_COEFF appears in reference [1]_
207 # in formula 7.3.14 (p. 190) named as "theta".
208 # As recommended there it value is fixed in 0.01.
209 UPDATE_COEFF = 0.01
211 EPS = np.finfo(float).eps
213 def __init__(self, x, fun, jac, hess, hessp=None,
214 k_easy=0.1, k_hard=0.2):
216 super().__init__(x, fun, jac, hess)
218 # When the trust-region shrinks in two consecutive
219 # calculations (``tr_radius < previous_tr_radius``)
220 # the lower bound ``lambda_lb`` may be reused,
221 # facilitating the convergence. To indicate no
222 # previous value is known at first ``previous_tr_radius``
223 # is set to -1 and ``lambda_lb`` to None.
224 self.previous_tr_radius = -1
225 self.lambda_lb = None
227 self.niter = 0
229 # ``k_easy`` and ``k_hard`` are parameters used
230 # to determine the stop criteria to the iterative
231 # subproblem solver. Take a look at pp. 194-197
232 # from reference _[1] for a more detailed description.
233 self.k_easy = k_easy
234 self.k_hard = k_hard
236 # Get Lapack function for cholesky decomposition.
237 # The implemented SciPy wrapper does not return
238 # the incomplete factorization needed by the method.
239 self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,))
241 # Get info about Hessian
242 self.dimension = len(self.hess)
243 self.hess_gershgorin_lb,\
244 self.hess_gershgorin_ub = gershgorin_bounds(self.hess)
245 self.hess_inf = norm(self.hess, np.Inf)
246 self.hess_fro = norm(self.hess, 'fro')
248 # A constant such that for vectors smaler than that
249 # backward substituition is not reliable. It was stabilished
250 # based on Golub, G. H., Van Loan, C. F. (2013).
251 # "Matrix computations". Forth Edition. JHU press., p.165.
252 self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf
254 def _initial_values(self, tr_radius):
255 """Given a trust radius, return a good initial guess for
256 the damping factor, the lower bound and the upper bound.
257 The values were chosen accordingly to the guidelines on
258 section 7.3.8 (p. 192) from [1]_.
259 """
261 # Upper bound for the damping factor
262 lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb,
263 self.hess_fro,
264 self.hess_inf))
266 # Lower bound for the damping factor
267 lambda_lb = max(0, -min(self.hess.diagonal()),
268 self.jac_mag/tr_radius - min(self.hess_gershgorin_ub,
269 self.hess_fro,
270 self.hess_inf))
272 # Improve bounds with previous info
273 if tr_radius < self.previous_tr_radius:
274 lambda_lb = max(self.lambda_lb, lambda_lb)
276 # Initial guess for the damping factor
277 if lambda_lb == 0:
278 lambda_initial = 0
279 else:
280 lambda_initial = max(np.sqrt(lambda_lb * lambda_ub),
281 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
283 return lambda_initial, lambda_lb, lambda_ub
285 def solve(self, tr_radius):
286 """Solve quadratic subproblem"""
288 lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius)
289 n = self.dimension
290 hits_boundary = True
291 already_factorized = False
292 self.niter = 0
294 while True:
296 # Compute Cholesky factorization
297 if already_factorized:
298 already_factorized = False
299 else:
300 H = self.hess+lambda_current*np.eye(n)
301 U, info = self.cholesky(H, lower=False,
302 overwrite_a=False,
303 clean=True)
305 self.niter += 1
307 # Check if factorization succeeded
308 if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO:
309 # Successful factorization
311 # Solve `U.T U p = s`
312 p = cho_solve((U, False), -self.jac)
314 p_norm = norm(p)
316 # Check for interior convergence
317 if p_norm <= tr_radius and lambda_current == 0:
318 hits_boundary = False
319 break
321 # Solve `U.T w = p`
322 w = solve_triangular(U, p, trans='T')
324 w_norm = norm(w)
326 # Compute Newton step accordingly to
327 # formula (4.44) p.87 from ref [2]_.
328 delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius
329 lambda_new = lambda_current + delta_lambda
331 if p_norm < tr_radius: # Inside boundary
332 s_min, z_min = estimate_smallest_singular_value(U)
334 ta, tb = self.get_boundaries_intersections(p, z_min,
335 tr_radius)
337 # Choose `step_len` with the smallest magnitude.
338 # The reason for this choice is explained at
339 # ref [3]_, p. 6 (Immediately before the formula
340 # for `tau`).
341 step_len = min([ta, tb], key=abs)
343 # Compute the quadratic term (p.T*H*p)
344 quadratic_term = np.dot(p, np.dot(H, p))
346 # Check stop criteria
347 relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2)
348 if relative_error <= self.k_hard:
349 p += step_len * z_min
350 break
352 # Update uncertanty bounds
353 lambda_ub = lambda_current
354 lambda_lb = max(lambda_lb, lambda_current - s_min**2)
356 # Compute Cholesky factorization
357 H = self.hess + lambda_new*np.eye(n)
358 c, info = self.cholesky(H, lower=False,
359 overwrite_a=False,
360 clean=True)
362 # Check if the factorization have succeeded
363 #
364 if info == 0: # Successful factorization
365 # Update damping factor
366 lambda_current = lambda_new
367 already_factorized = True
368 else: # Unsuccessful factorization
369 # Update uncertanty bounds
370 lambda_lb = max(lambda_lb, lambda_new)
372 # Update damping factor
373 lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
374 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
376 else: # Outside boundary
377 # Check stop criteria
378 relative_error = abs(p_norm - tr_radius) / tr_radius
379 if relative_error <= self.k_easy:
380 break
382 # Update uncertanty bounds
383 lambda_lb = lambda_current
385 # Update damping factor
386 lambda_current = lambda_new
388 elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO:
389 # jac_mag very close to zero
391 # Check for interior convergence
392 if lambda_current == 0:
393 p = np.zeros(n)
394 hits_boundary = False
395 break
397 s_min, z_min = estimate_smallest_singular_value(U)
398 step_len = tr_radius
400 # Check stop criteria
401 if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2:
402 p = step_len * z_min
403 break
405 # Update uncertanty bounds
406 lambda_ub = lambda_current
407 lambda_lb = max(lambda_lb, lambda_current - s_min**2)
409 # Update damping factor
410 lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
411 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
413 else: # Unsuccessful factorization
415 # Compute auxiliary terms
416 delta, v = singular_leading_submatrix(H, U, info)
417 v_norm = norm(v)
419 # Update uncertanty interval
420 lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2)
422 # Update damping factor
423 lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
424 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
426 self.lambda_lb = lambda_lb
427 self.lambda_current = lambda_current
428 self.previous_tr_radius = tr_radius
430 return p, hits_boundary