Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lsq/common.py: 11%
294 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""Functions used by least-squares algorithms."""
2from math import copysign
4import numpy as np
5from numpy.linalg import norm
7from scipy.linalg import cho_factor, cho_solve, LinAlgError
8from scipy.sparse import issparse
9from scipy.sparse.linalg import LinearOperator, aslinearoperator
12EPS = np.finfo(float).eps
15# Functions related to a trust-region problem.
18def intersect_trust_region(x, s, Delta):
19 """Find the intersection of a line with the boundary of a trust region.
21 This function solves the quadratic equation with respect to t
22 ||(x + s*t)||**2 = Delta**2.
24 Returns
25 -------
26 t_neg, t_pos : tuple of float
27 Negative and positive roots.
29 Raises
30 ------
31 ValueError
32 If `s` is zero or `x` is not within the trust region.
33 """
34 a = np.dot(s, s)
35 if a == 0:
36 raise ValueError("`s` is zero.")
38 b = np.dot(x, s)
40 c = np.dot(x, x) - Delta**2
41 if c > 0:
42 raise ValueError("`x` is not within the trust region.")
44 d = np.sqrt(b*b - a*c) # Root from one fourth of the discriminant.
46 # Computations below avoid loss of significance, see "Numerical Recipes".
47 q = -(b + copysign(d, b))
48 t1 = q / a
49 t2 = c / q
51 if t1 < t2:
52 return t1, t2
53 else:
54 return t2, t1
57def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,
58 rtol=0.01, max_iter=10):
59 """Solve a trust-region problem arising in least-squares minimization.
61 This function implements a method described by J. J. More [1]_ and used
62 in MINPACK, but it relies on a single SVD of Jacobian instead of series
63 of Cholesky decompositions. Before running this function, compute:
64 ``U, s, VT = svd(J, full_matrices=False)``.
66 Parameters
67 ----------
68 n : int
69 Number of variables.
70 m : int
71 Number of residuals.
72 uf : ndarray
73 Computed as U.T.dot(f).
74 s : ndarray
75 Singular values of J.
76 V : ndarray
77 Transpose of VT.
78 Delta : float
79 Radius of a trust region.
80 initial_alpha : float, optional
81 Initial guess for alpha, which might be available from a previous
82 iteration. If None, determined automatically.
83 rtol : float, optional
84 Stopping tolerance for the root-finding procedure. Namely, the
85 solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
86 max_iter : int, optional
87 Maximum allowed number of iterations for the root-finding procedure.
89 Returns
90 -------
91 p : ndarray, shape (n,)
92 Found solution of a trust-region problem.
93 alpha : float
94 Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
95 Sometimes called Levenberg-Marquardt parameter.
96 n_iter : int
97 Number of iterations made by root-finding procedure. Zero means
98 that Gauss-Newton step was selected as the solution.
100 References
101 ----------
102 .. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
103 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
104 in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
105 """
106 def phi_and_derivative(alpha, suf, s, Delta):
107 """Function of which to find zero.
109 It is defined as "norm of regularized (by alpha) least-squares
110 solution minus `Delta`". Refer to [1]_.
111 """
112 denom = s**2 + alpha
113 p_norm = norm(suf / denom)
114 phi = p_norm - Delta
115 phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm
116 return phi, phi_prime
118 suf = s * uf
120 # Check if J has full rank and try Gauss-Newton step.
121 if m >= n:
122 threshold = EPS * m * s[0]
123 full_rank = s[-1] > threshold
124 else:
125 full_rank = False
127 if full_rank:
128 p = -V.dot(uf / s)
129 if norm(p) <= Delta:
130 return p, 0.0, 0
132 alpha_upper = norm(suf) / Delta
134 if full_rank:
135 phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
136 alpha_lower = -phi / phi_prime
137 else:
138 alpha_lower = 0.0
140 if initial_alpha is None or not full_rank and initial_alpha == 0:
141 alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
142 else:
143 alpha = initial_alpha
145 for it in range(max_iter):
146 if alpha < alpha_lower or alpha > alpha_upper:
147 alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
149 phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)
151 if phi < 0:
152 alpha_upper = alpha
154 ratio = phi / phi_prime
155 alpha_lower = max(alpha_lower, alpha - ratio)
156 alpha -= (phi + Delta) * ratio / Delta
158 if np.abs(phi) < rtol * Delta:
159 break
161 p = -V.dot(suf / (s**2 + alpha))
163 # Make the norm of p equal to Delta, p is changed only slightly during
164 # this. It is done to prevent p lie outside the trust region (which can
165 # cause problems later).
166 p *= Delta / norm(p)
168 return p, alpha, it + 1
171def solve_trust_region_2d(B, g, Delta):
172 """Solve a general trust-region problem in 2 dimensions.
174 The problem is reformulated as a 4th order algebraic equation,
175 the solution of which is found by numpy.roots.
177 Parameters
178 ----------
179 B : ndarray, shape (2, 2)
180 Symmetric matrix, defines a quadratic term of the function.
181 g : ndarray, shape (2,)
182 Defines a linear term of the function.
183 Delta : float
184 Radius of a trust region.
186 Returns
187 -------
188 p : ndarray, shape (2,)
189 Found solution.
190 newton_step : bool
191 Whether the returned solution is the Newton step which lies within
192 the trust region.
193 """
194 try:
195 R, lower = cho_factor(B)
196 p = -cho_solve((R, lower), g)
197 if np.dot(p, p) <= Delta**2:
198 return p, True
199 except LinAlgError:
200 pass
202 a = B[0, 0] * Delta**2
203 b = B[0, 1] * Delta**2
204 c = B[1, 1] * Delta**2
206 d = g[0] * Delta
207 f = g[1] * Delta
209 coeffs = np.array(
210 [-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
211 t = np.roots(coeffs) # Can handle leading zeros.
212 t = np.real(t[np.isreal(t)])
214 p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
215 value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)
216 i = np.argmin(value)
217 p = p[:, i]
219 return p, False
222def update_tr_radius(Delta, actual_reduction, predicted_reduction,
223 step_norm, bound_hit):
224 """Update the radius of a trust region based on the cost reduction.
226 Returns
227 -------
228 Delta : float
229 New radius.
230 ratio : float
231 Ratio between actual and predicted reductions.
232 """
233 if predicted_reduction > 0:
234 ratio = actual_reduction / predicted_reduction
235 elif predicted_reduction == actual_reduction == 0:
236 ratio = 1
237 else:
238 ratio = 0
240 if ratio < 0.25:
241 Delta = 0.25 * step_norm
242 elif ratio > 0.75 and bound_hit:
243 Delta *= 2.0
245 return Delta, ratio
248# Construction and minimization of quadratic functions.
251def build_quadratic_1d(J, g, s, diag=None, s0=None):
252 """Parameterize a multivariate quadratic function along a line.
254 The resulting univariate quadratic function is given as follows::
256 f(t) = 0.5 * (s0 + s*t).T * (J.T*J + diag) * (s0 + s*t) +
257 g.T * (s0 + s*t)
259 Parameters
260 ----------
261 J : ndarray, sparse matrix or LinearOperator shape (m, n)
262 Jacobian matrix, affects the quadratic term.
263 g : ndarray, shape (n,)
264 Gradient, defines the linear term.
265 s : ndarray, shape (n,)
266 Direction vector of a line.
267 diag : None or ndarray with shape (n,), optional
268 Addition diagonal part, affects the quadratic term.
269 If None, assumed to be 0.
270 s0 : None or ndarray with shape (n,), optional
271 Initial point. If None, assumed to be 0.
273 Returns
274 -------
275 a : float
276 Coefficient for t**2.
277 b : float
278 Coefficient for t.
279 c : float
280 Free term. Returned only if `s0` is provided.
281 """
282 v = J.dot(s)
283 a = np.dot(v, v)
284 if diag is not None:
285 a += np.dot(s * diag, s)
286 a *= 0.5
288 b = np.dot(g, s)
290 if s0 is not None:
291 u = J.dot(s0)
292 b += np.dot(u, v)
293 c = 0.5 * np.dot(u, u) + np.dot(g, s0)
294 if diag is not None:
295 b += np.dot(s0 * diag, s)
296 c += 0.5 * np.dot(s0 * diag, s0)
297 return a, b, c
298 else:
299 return a, b
302def minimize_quadratic_1d(a, b, lb, ub, c=0):
303 """Minimize a 1-D quadratic function subject to bounds.
305 The free term `c` is 0 by default. Bounds must be finite.
307 Returns
308 -------
309 t : float
310 Minimum point.
311 y : float
312 Minimum value.
313 """
314 t = [lb, ub]
315 if a != 0:
316 extremum = -0.5 * b / a
317 if lb < extremum < ub:
318 t.append(extremum)
319 t = np.asarray(t)
320 y = t * (a * t + b) + c
321 min_index = np.argmin(y)
322 return t[min_index], y[min_index]
325def evaluate_quadratic(J, g, s, diag=None):
326 """Compute values of a quadratic function arising in least squares.
328 The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.
330 Parameters
331 ----------
332 J : ndarray, sparse matrix or LinearOperator, shape (m, n)
333 Jacobian matrix, affects the quadratic term.
334 g : ndarray, shape (n,)
335 Gradient, defines the linear term.
336 s : ndarray, shape (k, n) or (n,)
337 Array containing steps as rows.
338 diag : ndarray, shape (n,), optional
339 Addition diagonal part, affects the quadratic term.
340 If None, assumed to be 0.
342 Returns
343 -------
344 values : ndarray with shape (k,) or float
345 Values of the function. If `s` was 2-D, then ndarray is
346 returned, otherwise, float is returned.
347 """
348 if s.ndim == 1:
349 Js = J.dot(s)
350 q = np.dot(Js, Js)
351 if diag is not None:
352 q += np.dot(s * diag, s)
353 else:
354 Js = J.dot(s.T)
355 q = np.sum(Js**2, axis=0)
356 if diag is not None:
357 q += np.sum(diag * s**2, axis=1)
359 l = np.dot(s, g)
361 return 0.5 * q + l
364# Utility functions to work with bound constraints.
367def in_bounds(x, lb, ub):
368 """Check if a point lies within bounds."""
369 return np.all((x >= lb) & (x <= ub))
372def step_size_to_bound(x, s, lb, ub):
373 """Compute a min_step size required to reach a bound.
375 The function computes a positive scalar t, such that x + s * t is on
376 the bound.
378 Returns
379 -------
380 step : float
381 Computed step. Non-negative value.
382 hits : ndarray of int with shape of x
383 Each element indicates whether a corresponding variable reaches the
384 bound:
386 * 0 - the bound was not hit.
387 * -1 - the lower bound was hit.
388 * 1 - the upper bound was hit.
389 """
390 non_zero = np.nonzero(s)
391 s_non_zero = s[non_zero]
392 steps = np.empty_like(x)
393 steps.fill(np.inf)
394 with np.errstate(over='ignore'):
395 steps[non_zero] = np.maximum((lb - x)[non_zero] / s_non_zero,
396 (ub - x)[non_zero] / s_non_zero)
397 min_step = np.min(steps)
398 return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
401def find_active_constraints(x, lb, ub, rtol=1e-10):
402 """Determine which constraints are active in a given point.
404 The threshold is computed using `rtol` and the absolute value of the
405 closest bound.
407 Returns
408 -------
409 active : ndarray of int with shape of x
410 Each component shows whether the corresponding constraint is active:
412 * 0 - a constraint is not active.
413 * -1 - a lower bound is active.
414 * 1 - a upper bound is active.
415 """
416 active = np.zeros_like(x, dtype=int)
418 if rtol == 0:
419 active[x <= lb] = -1
420 active[x >= ub] = 1
421 return active
423 lower_dist = x - lb
424 upper_dist = ub - x
426 lower_threshold = rtol * np.maximum(1, np.abs(lb))
427 upper_threshold = rtol * np.maximum(1, np.abs(ub))
429 lower_active = (np.isfinite(lb) &
430 (lower_dist <= np.minimum(upper_dist, lower_threshold)))
431 active[lower_active] = -1
433 upper_active = (np.isfinite(ub) &
434 (upper_dist <= np.minimum(lower_dist, upper_threshold)))
435 active[upper_active] = 1
437 return active
440def make_strictly_feasible(x, lb, ub, rstep=1e-10):
441 """Shift a point to the interior of a feasible region.
443 Each element of the returned vector is at least at a relative distance
444 `rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
445 """
446 x_new = x.copy()
448 active = find_active_constraints(x, lb, ub, rstep)
449 lower_mask = np.equal(active, -1)
450 upper_mask = np.equal(active, 1)
452 if rstep == 0:
453 x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
454 x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
455 else:
456 x_new[lower_mask] = (lb[lower_mask] +
457 rstep * np.maximum(1, np.abs(lb[lower_mask])))
458 x_new[upper_mask] = (ub[upper_mask] -
459 rstep * np.maximum(1, np.abs(ub[upper_mask])))
461 tight_bounds = (x_new < lb) | (x_new > ub)
462 x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])
464 return x_new
467def CL_scaling_vector(x, g, lb, ub):
468 """Compute Coleman-Li scaling vector and its derivatives.
470 Components of a vector v are defined as follows::
472 | ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
473 v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
474 | 1, otherwise
476 According to this definition v[i] >= 0 for all i. It differs from the
477 definition in paper [1]_ (eq. (2.2)), where the absolute value of v is
478 used. Both definitions are equivalent down the line.
479 Derivatives of v with respect to x take value 1, -1 or 0 depending on a
480 case.
482 Returns
483 -------
484 v : ndarray with shape of x
485 Scaling vector.
486 dv : ndarray with shape of x
487 Derivatives of v[i] with respect to x[i], diagonal elements of v's
488 Jacobian.
490 References
491 ----------
492 .. [1] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,
493 and Conjugate Gradient Method for Large-Scale Bound-Constrained
494 Minimization Problems," SIAM Journal on Scientific Computing,
495 Vol. 21, Number 1, pp 1-23, 1999.
496 """
497 v = np.ones_like(x)
498 dv = np.zeros_like(x)
500 mask = (g < 0) & np.isfinite(ub)
501 v[mask] = ub[mask] - x[mask]
502 dv[mask] = -1
504 mask = (g > 0) & np.isfinite(lb)
505 v[mask] = x[mask] - lb[mask]
506 dv[mask] = 1
508 return v, dv
511def reflective_transformation(y, lb, ub):
512 """Compute reflective transformation and its gradient."""
513 if in_bounds(y, lb, ub):
514 return y, np.ones_like(y)
516 lb_finite = np.isfinite(lb)
517 ub_finite = np.isfinite(ub)
519 x = y.copy()
520 g_negative = np.zeros_like(y, dtype=bool)
522 mask = lb_finite & ~ub_finite
523 x[mask] = np.maximum(y[mask], 2 * lb[mask] - y[mask])
524 g_negative[mask] = y[mask] < lb[mask]
526 mask = ~lb_finite & ub_finite
527 x[mask] = np.minimum(y[mask], 2 * ub[mask] - y[mask])
528 g_negative[mask] = y[mask] > ub[mask]
530 mask = lb_finite & ub_finite
531 d = ub - lb
532 t = np.remainder(y[mask] - lb[mask], 2 * d[mask])
533 x[mask] = lb[mask] + np.minimum(t, 2 * d[mask] - t)
534 g_negative[mask] = t > d[mask]
536 g = np.ones_like(y)
537 g[g_negative] = -1
539 return x, g
542# Functions to display algorithm's progress.
545def print_header_nonlinear():
546 print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}{5:^15}"
547 .format("Iteration", "Total nfev", "Cost", "Cost reduction",
548 "Step norm", "Optimality"))
551def print_iteration_nonlinear(iteration, nfev, cost, cost_reduction,
552 step_norm, optimality):
553 if cost_reduction is None:
554 cost_reduction = " " * 15
555 else:
556 cost_reduction = "{0:^15.2e}".format(cost_reduction)
558 if step_norm is None:
559 step_norm = " " * 15
560 else:
561 step_norm = "{0:^15.2e}".format(step_norm)
563 print("{0:^15}{1:^15}{2:^15.4e}{3}{4}{5:^15.2e}"
564 .format(iteration, nfev, cost, cost_reduction,
565 step_norm, optimality))
568def print_header_linear():
569 print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}"
570 .format("Iteration", "Cost", "Cost reduction", "Step norm",
571 "Optimality"))
574def print_iteration_linear(iteration, cost, cost_reduction, step_norm,
575 optimality):
576 if cost_reduction is None:
577 cost_reduction = " " * 15
578 else:
579 cost_reduction = "{0:^15.2e}".format(cost_reduction)
581 if step_norm is None:
582 step_norm = " " * 15
583 else:
584 step_norm = "{0:^15.2e}".format(step_norm)
586 print("{0:^15}{1:^15.4e}{2}{3}{4:^15.2e}".format(
587 iteration, cost, cost_reduction, step_norm, optimality))
590# Simple helper functions.
593def compute_grad(J, f):
594 """Compute gradient of the least-squares cost function."""
595 if isinstance(J, LinearOperator):
596 return J.rmatvec(f)
597 else:
598 return J.T.dot(f)
601def compute_jac_scale(J, scale_inv_old=None):
602 """Compute variables scale based on the Jacobian matrix."""
603 if issparse(J):
604 scale_inv = np.asarray(J.power(2).sum(axis=0)).ravel()**0.5
605 else:
606 scale_inv = np.sum(J**2, axis=0)**0.5
608 if scale_inv_old is None:
609 scale_inv[scale_inv == 0] = 1
610 else:
611 scale_inv = np.maximum(scale_inv, scale_inv_old)
613 return 1 / scale_inv, scale_inv
616def left_multiplied_operator(J, d):
617 """Return diag(d) J as LinearOperator."""
618 J = aslinearoperator(J)
620 def matvec(x):
621 return d * J.matvec(x)
623 def matmat(X):
624 return d[:, np.newaxis] * J.matmat(X)
626 def rmatvec(x):
627 return J.rmatvec(x.ravel() * d)
629 return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
630 rmatvec=rmatvec)
633def right_multiplied_operator(J, d):
634 """Return J diag(d) as LinearOperator."""
635 J = aslinearoperator(J)
637 def matvec(x):
638 return J.matvec(np.ravel(x) * d)
640 def matmat(X):
641 return J.matmat(X * d[:, np.newaxis])
643 def rmatvec(x):
644 return d * J.rmatvec(x)
646 return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
647 rmatvec=rmatvec)
650def regularized_lsq_operator(J, diag):
651 """Return a matrix arising in regularized least squares as LinearOperator.
653 The matrix is
654 [ J ]
655 [ D ]
656 where D is diagonal matrix with elements from `diag`.
657 """
658 J = aslinearoperator(J)
659 m, n = J.shape
661 def matvec(x):
662 return np.hstack((J.matvec(x), diag * x))
664 def rmatvec(x):
665 x1 = x[:m]
666 x2 = x[m:]
667 return J.rmatvec(x1) + diag * x2
669 return LinearOperator((m + n, n), matvec=matvec, rmatvec=rmatvec)
672def right_multiply(J, d, copy=True):
673 """Compute J diag(d).
675 If `copy` is False, `J` is modified in place (unless being LinearOperator).
676 """
677 if copy and not isinstance(J, LinearOperator):
678 J = J.copy()
680 if issparse(J):
681 J.data *= d.take(J.indices, mode='clip') # scikit-learn recipe.
682 elif isinstance(J, LinearOperator):
683 J = right_multiplied_operator(J, d)
684 else:
685 J *= d
687 return J
690def left_multiply(J, d, copy=True):
691 """Compute diag(d) J.
693 If `copy` is False, `J` is modified in place (unless being LinearOperator).
694 """
695 if copy and not isinstance(J, LinearOperator):
696 J = J.copy()
698 if issparse(J):
699 J.data *= np.repeat(d, np.diff(J.indptr)) # scikit-learn recipe.
700 elif isinstance(J, LinearOperator):
701 J = left_multiplied_operator(J, d)
702 else:
703 J *= d[:, np.newaxis]
705 return J
708def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):
709 """Check termination condition for nonlinear least squares."""
710 ftol_satisfied = dF < ftol * F and ratio > 0.25
711 xtol_satisfied = dx_norm < xtol * (xtol + x_norm)
713 if ftol_satisfied and xtol_satisfied:
714 return 4
715 elif ftol_satisfied:
716 return 2
717 elif xtol_satisfied:
718 return 3
719 else:
720 return None
723def scale_for_robust_loss_function(J, f, rho):
724 """Scale Jacobian and residuals for a robust loss function.
726 Arrays are modified in place.
727 """
728 J_scale = rho[1] + 2 * rho[2] * f**2
729 J_scale[J_scale < EPS] = EPS
730 J_scale **= 0.5
732 f *= rho[1] / J_scale
734 return left_multiply(J, J_scale, copy=False), f