Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lsq/dogbox.py: 6%
147 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""
2Dogleg algorithm with rectangular trust regions for least-squares minimization.
4The description of the algorithm can be found in [Voglis]_. The algorithm does
5trust-region iterations, but the shape of trust regions is rectangular as
6opposed to conventional elliptical. The intersection of a trust region and
7an initial feasible region is again some rectangle. Thus, on each iteration a
8bound-constrained quadratic optimization problem is solved.
10A quadratic problem is solved by well-known dogleg approach, where the
11function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
12Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
13along this path, and optimization amounts to simply following along this
14path as long as a point stays within the bounds. A constrained Cauchy step
15(along the anti-gradient) is considered for safety in rank deficient cases,
16in this situations the convergence might be slow.
18If during iterations some variable hit the initial bound and the component
19of anti-gradient points outside the feasible region, then a next dogleg step
20won't make any progress. At this state such variables satisfy first-order
21optimality conditions and they are excluded before computing a next dogleg
22step.
24Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
25Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
26dense and sparse matrices, or Jacobian being LinearOperator). The second
27option allows to solve very large problems (up to couple of millions of
28residuals on a regular PC), provided the Jacobian matrix is sufficiently
29sparse. But note that dogbox is not very good for solving problems with
30large number of constraints, because of variables exclusion-inclusion on each
31iteration (a required number of function evaluations might be high or accuracy
32of a solution will be poor), thus its large-scale usage is probably limited
33to unconstrained problems.
35References
36----------
37.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
38 Approach for Unconstrained and Bound Constrained Nonlinear
39 Optimization", WSEAS International Conference on Applied
40 Mathematics, Corfu, Greece, 2004.
41.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
42"""
43import numpy as np
44from numpy.linalg import lstsq, norm
46from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
47from scipy.optimize import OptimizeResult
49from .common import (
50 step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
51 build_quadratic_1d, minimize_quadratic_1d, compute_grad,
52 compute_jac_scale, check_termination, scale_for_robust_loss_function,
53 print_header_nonlinear, print_iteration_nonlinear)
56def lsmr_operator(Jop, d, active_set):
57 """Compute LinearOperator to use in LSMR by dogbox algorithm.
59 `active_set` mask is used to excluded active variables from computations
60 of matrix-vector products.
61 """
62 m, n = Jop.shape
64 def matvec(x):
65 x_free = x.ravel().copy()
66 x_free[active_set] = 0
67 return Jop.matvec(x * d)
69 def rmatvec(x):
70 r = d * Jop.rmatvec(x)
71 r[active_set] = 0
72 return r
74 return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
77def find_intersection(x, tr_bounds, lb, ub):
78 """Find intersection of trust-region bounds and initial bounds.
80 Returns
81 -------
82 lb_total, ub_total : ndarray with shape of x
83 Lower and upper bounds of the intersection region.
84 orig_l, orig_u : ndarray of bool with shape of x
85 True means that an original bound is taken as a corresponding bound
86 in the intersection region.
87 tr_l, tr_u : ndarray of bool with shape of x
88 True means that a trust-region bound is taken as a corresponding bound
89 in the intersection region.
90 """
91 lb_centered = lb - x
92 ub_centered = ub - x
94 lb_total = np.maximum(lb_centered, -tr_bounds)
95 ub_total = np.minimum(ub_centered, tr_bounds)
97 orig_l = np.equal(lb_total, lb_centered)
98 orig_u = np.equal(ub_total, ub_centered)
100 tr_l = np.equal(lb_total, -tr_bounds)
101 tr_u = np.equal(ub_total, tr_bounds)
103 return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
106def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
107 """Find dogleg step in a rectangular region.
109 Returns
110 -------
111 step : ndarray, shape (n,)
112 Computed dogleg step.
113 bound_hits : ndarray of int, shape (n,)
114 Each component shows whether a corresponding variable hits the
115 initial bound after the step is taken:
116 * 0 - a variable doesn't hit the bound.
117 * -1 - lower bound is hit.
118 * 1 - upper bound is hit.
119 tr_hit : bool
120 Whether the step hit the boundary of the trust-region.
121 """
122 lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
123 x, tr_bounds, lb, ub
124 )
125 bound_hits = np.zeros_like(x, dtype=int)
127 if in_bounds(newton_step, lb_total, ub_total):
128 return newton_step, bound_hits, False
130 to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
132 # The classical dogleg algorithm would check if Cauchy step fits into
133 # the bounds, and just return it constrained version if not. But in a
134 # rectangular trust region it makes sense to try to improve constrained
135 # Cauchy step too. Thus, we don't distinguish these two cases.
137 cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
139 step_diff = newton_step - cauchy_step
140 step_size, hits = step_size_to_bound(cauchy_step, step_diff,
141 lb_total, ub_total)
142 bound_hits[(hits < 0) & orig_l] = -1
143 bound_hits[(hits > 0) & orig_u] = 1
144 tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
146 return cauchy_step + step_size * step_diff, bound_hits, tr_hit
149def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
150 loss_function, tr_solver, tr_options, verbose):
151 f = f0
152 f_true = f.copy()
153 nfev = 1
155 J = J0
156 njev = 1
158 if loss_function is not None:
159 rho = loss_function(f)
160 cost = 0.5 * np.sum(rho[0])
161 J, f = scale_for_robust_loss_function(J, f, rho)
162 else:
163 cost = 0.5 * np.dot(f, f)
165 g = compute_grad(J, f)
167 jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
168 if jac_scale:
169 scale, scale_inv = compute_jac_scale(J)
170 else:
171 scale, scale_inv = x_scale, 1 / x_scale
173 Delta = norm(x0 * scale_inv, ord=np.inf)
174 if Delta == 0:
175 Delta = 1.0
177 on_bound = np.zeros_like(x0, dtype=int)
178 on_bound[np.equal(x0, lb)] = -1
179 on_bound[np.equal(x0, ub)] = 1
181 x = x0
182 step = np.empty_like(x0)
184 if max_nfev is None:
185 max_nfev = x0.size * 100
187 termination_status = None
188 iteration = 0
189 step_norm = None
190 actual_reduction = None
192 if verbose == 2:
193 print_header_nonlinear()
195 while True:
196 active_set = on_bound * g < 0
197 free_set = ~active_set
199 g_free = g[free_set]
200 g_full = g.copy()
201 g[active_set] = 0
203 g_norm = norm(g, ord=np.inf)
204 if g_norm < gtol:
205 termination_status = 1
207 if verbose == 2:
208 print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
209 step_norm, g_norm)
211 if termination_status is not None or nfev == max_nfev:
212 break
214 x_free = x[free_set]
215 lb_free = lb[free_set]
216 ub_free = ub[free_set]
217 scale_free = scale[free_set]
219 # Compute (Gauss-)Newton and build quadratic model for Cauchy step.
220 if tr_solver == 'exact':
221 J_free = J[:, free_set]
222 newton_step = lstsq(J_free, -f, rcond=-1)[0]
224 # Coefficients for the quadratic model along the anti-gradient.
225 a, b = build_quadratic_1d(J_free, g_free, -g_free)
226 elif tr_solver == 'lsmr':
227 Jop = aslinearoperator(J)
229 # We compute lsmr step in scaled variables and then
230 # transform back to normal variables, if lsmr would give exact lsq
231 # solution, this would be equivalent to not doing any
232 # transformations, but from experience it's better this way.
234 # We pass active_set to make computations as if we selected
235 # the free subset of J columns, but without actually doing any
236 # slicing, which is expensive for sparse matrices and impossible
237 # for LinearOperator.
239 lsmr_op = lsmr_operator(Jop, scale, active_set)
240 newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
241 newton_step *= scale_free
243 # Components of g for active variables were zeroed, so this call
244 # is correct and equivalent to using J_free and g_free.
245 a, b = build_quadratic_1d(Jop, g, -g)
247 actual_reduction = -1.0
248 while actual_reduction <= 0 and nfev < max_nfev:
249 tr_bounds = Delta * scale_free
251 step_free, on_bound_free, tr_hit = dogleg_step(
252 x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
254 step.fill(0.0)
255 step[free_set] = step_free
257 if tr_solver == 'exact':
258 predicted_reduction = -evaluate_quadratic(J_free, g_free,
259 step_free)
260 elif tr_solver == 'lsmr':
261 predicted_reduction = -evaluate_quadratic(Jop, g, step)
263 # gh11403 ensure that solution is fully within bounds.
264 x_new = np.clip(x + step, lb, ub)
266 f_new = fun(x_new)
267 nfev += 1
269 step_h_norm = norm(step * scale_inv, ord=np.inf)
271 if not np.all(np.isfinite(f_new)):
272 Delta = 0.25 * step_h_norm
273 continue
275 # Usual trust-region step quality estimation.
276 if loss_function is not None:
277 cost_new = loss_function(f_new, cost_only=True)
278 else:
279 cost_new = 0.5 * np.dot(f_new, f_new)
280 actual_reduction = cost - cost_new
282 Delta, ratio = update_tr_radius(
283 Delta, actual_reduction, predicted_reduction,
284 step_h_norm, tr_hit
285 )
287 step_norm = norm(step)
288 termination_status = check_termination(
289 actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
291 if termination_status is not None:
292 break
294 if actual_reduction > 0:
295 on_bound[free_set] = on_bound_free
297 x = x_new
298 # Set variables exactly at the boundary.
299 mask = on_bound == -1
300 x[mask] = lb[mask]
301 mask = on_bound == 1
302 x[mask] = ub[mask]
304 f = f_new
305 f_true = f.copy()
307 cost = cost_new
309 J = jac(x, f)
310 njev += 1
312 if loss_function is not None:
313 rho = loss_function(f)
314 J, f = scale_for_robust_loss_function(J, f, rho)
316 g = compute_grad(J, f)
318 if jac_scale:
319 scale, scale_inv = compute_jac_scale(J, scale_inv)
320 else:
321 step_norm = 0
322 actual_reduction = 0
324 iteration += 1
326 if termination_status is None:
327 termination_status = 0
329 return OptimizeResult(
330 x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
331 active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)