Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lsq/least_squares.py: 10%
259 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""Generic interface for least-squares minimization."""
2from warnings import warn
4import numpy as np
5from numpy.linalg import norm
7from scipy.sparse import issparse
8from scipy.sparse.linalg import LinearOperator
9from scipy.optimize import _minpack, OptimizeResult
10from scipy.optimize._numdiff import approx_derivative, group_columns
11from scipy.optimize._minimize import Bounds
13from .trf import trf
14from .dogbox import dogbox
15from .common import EPS, in_bounds, make_strictly_feasible
18TERMINATION_MESSAGES = {
19 -1: "Improper input parameters status returned from `leastsq`",
20 0: "The maximum number of function evaluations is exceeded.",
21 1: "`gtol` termination condition is satisfied.",
22 2: "`ftol` termination condition is satisfied.",
23 3: "`xtol` termination condition is satisfied.",
24 4: "Both `ftol` and `xtol` termination conditions are satisfied."
25}
28FROM_MINPACK_TO_COMMON = {
29 0: -1, # Improper input parameters from MINPACK.
30 1: 2,
31 2: 3,
32 3: 4,
33 4: 1,
34 5: 0
35 # There are 6, 7, 8 for too small tolerance parameters,
36 # but we guard against it by checking ftol, xtol, gtol beforehand.
37}
40def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):
41 n = x0.size
43 if diff_step is None:
44 epsfcn = EPS
45 else:
46 epsfcn = diff_step**2
48 # Compute MINPACK's `diag`, which is inverse of our `x_scale` and
49 # ``x_scale='jac'`` corresponds to ``diag=None``.
50 if isinstance(x_scale, str) and x_scale == 'jac':
51 diag = None
52 else:
53 diag = 1 / x_scale
55 full_output = True
56 col_deriv = False
57 factor = 100.0
59 if jac is None:
60 if max_nfev is None:
61 # n squared to account for Jacobian evaluations.
62 max_nfev = 100 * n * (n + 1)
63 x, info, status = _minpack._lmdif(
64 fun, x0, (), full_output, ftol, xtol, gtol,
65 max_nfev, epsfcn, factor, diag)
66 else:
67 if max_nfev is None:
68 max_nfev = 100 * n
69 x, info, status = _minpack._lmder(
70 fun, jac, x0, (), full_output, col_deriv,
71 ftol, xtol, gtol, max_nfev, factor, diag)
73 f = info['fvec']
75 if callable(jac):
76 J = jac(x)
77 else:
78 J = np.atleast_2d(approx_derivative(fun, x))
80 cost = 0.5 * np.dot(f, f)
81 g = J.T.dot(f)
82 g_norm = norm(g, ord=np.inf)
84 nfev = info['nfev']
85 njev = info.get('njev', None)
87 status = FROM_MINPACK_TO_COMMON[status]
88 active_mask = np.zeros_like(x0, dtype=int)
90 return OptimizeResult(
91 x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,
92 active_mask=active_mask, nfev=nfev, njev=njev, status=status)
95def prepare_bounds(bounds, n):
96 lb, ub = [np.asarray(b, dtype=float) for b in bounds]
97 if lb.ndim == 0:
98 lb = np.resize(lb, n)
100 if ub.ndim == 0:
101 ub = np.resize(ub, n)
103 return lb, ub
106def check_tolerance(ftol, xtol, gtol, method):
107 def check(tol, name):
108 if tol is None:
109 tol = 0
110 elif tol < EPS:
111 warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "
112 "disables the corresponding termination condition."
113 .format(name, EPS))
114 return tol
116 ftol = check(ftol, "ftol")
117 xtol = check(xtol, "xtol")
118 gtol = check(gtol, "gtol")
120 if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
121 raise ValueError("All tolerances must be higher than machine epsilon "
122 "({:.2e}) for method 'lm'.".format(EPS))
123 elif ftol < EPS and xtol < EPS and gtol < EPS:
124 raise ValueError("At least one of the tolerances must be higher than "
125 "machine epsilon ({:.2e}).".format(EPS))
127 return ftol, xtol, gtol
130def check_x_scale(x_scale, x0):
131 if isinstance(x_scale, str) and x_scale == 'jac':
132 return x_scale
134 try:
135 x_scale = np.asarray(x_scale, dtype=float)
136 valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)
137 except (ValueError, TypeError):
138 valid = False
140 if not valid:
141 raise ValueError("`x_scale` must be 'jac' or array_like with "
142 "positive numbers.")
144 if x_scale.ndim == 0:
145 x_scale = np.resize(x_scale, x0.shape)
147 if x_scale.shape != x0.shape:
148 raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")
150 return x_scale
153def check_jac_sparsity(jac_sparsity, m, n):
154 if jac_sparsity is None:
155 return None
157 if not issparse(jac_sparsity):
158 jac_sparsity = np.atleast_2d(jac_sparsity)
160 if jac_sparsity.shape != (m, n):
161 raise ValueError("`jac_sparsity` has wrong shape.")
163 return jac_sparsity, group_columns(jac_sparsity)
166# Loss functions.
169def huber(z, rho, cost_only):
170 mask = z <= 1
171 rho[0, mask] = z[mask]
172 rho[0, ~mask] = 2 * z[~mask]**0.5 - 1
173 if cost_only:
174 return
175 rho[1, mask] = 1
176 rho[1, ~mask] = z[~mask]**-0.5
177 rho[2, mask] = 0
178 rho[2, ~mask] = -0.5 * z[~mask]**-1.5
181def soft_l1(z, rho, cost_only):
182 t = 1 + z
183 rho[0] = 2 * (t**0.5 - 1)
184 if cost_only:
185 return
186 rho[1] = t**-0.5
187 rho[2] = -0.5 * t**-1.5
190def cauchy(z, rho, cost_only):
191 rho[0] = np.log1p(z)
192 if cost_only:
193 return
194 t = 1 + z
195 rho[1] = 1 / t
196 rho[2] = -1 / t**2
199def arctan(z, rho, cost_only):
200 rho[0] = np.arctan(z)
201 if cost_only:
202 return
203 t = 1 + z**2
204 rho[1] = 1 / t
205 rho[2] = -2 * z / t**2
208IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,
209 cauchy=cauchy, arctan=arctan)
212def construct_loss_function(m, loss, f_scale):
213 if loss == 'linear':
214 return None
216 if not callable(loss):
217 loss = IMPLEMENTED_LOSSES[loss]
218 rho = np.empty((3, m))
220 def loss_function(f, cost_only=False):
221 z = (f / f_scale) ** 2
222 loss(z, rho, cost_only=cost_only)
223 if cost_only:
224 return 0.5 * f_scale ** 2 * np.sum(rho[0])
225 rho[0] *= f_scale ** 2
226 rho[2] /= f_scale ** 2
227 return rho
228 else:
229 def loss_function(f, cost_only=False):
230 z = (f / f_scale) ** 2
231 rho = loss(z)
232 if cost_only:
233 return 0.5 * f_scale ** 2 * np.sum(rho[0])
234 rho[0] *= f_scale ** 2
235 rho[2] /= f_scale ** 2
236 return rho
238 return loss_function
241def least_squares(
242 fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',
243 ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
244 f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},
245 jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):
246 """Solve a nonlinear least-squares problem with bounds on the variables.
248 Given the residuals f(x) (an m-D real function of n real
249 variables) and the loss function rho(s) (a scalar function), `least_squares`
250 finds a local minimum of the cost function F(x)::
252 minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
253 subject to lb <= x <= ub
255 The purpose of the loss function rho(s) is to reduce the influence of
256 outliers on the solution.
258 Parameters
259 ----------
260 fun : callable
261 Function which computes the vector of residuals, with the signature
262 ``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with
263 respect to its first argument. The argument ``x`` passed to this
264 function is an ndarray of shape (n,) (never a scalar, even for n=1).
265 It must allocate and return a 1-D array_like of shape (m,) or a scalar.
266 If the argument ``x`` is complex or the function ``fun`` returns
267 complex residuals, it must be wrapped in a real function of real
268 arguments, as shown at the end of the Examples section.
269 x0 : array_like with shape (n,) or float
270 Initial guess on independent variables. If float, it will be treated
271 as a 1-D array with one element.
272 jac : {'2-point', '3-point', 'cs', callable}, optional
273 Method of computing the Jacobian matrix (an m-by-n matrix, where
274 element (i, j) is the partial derivative of f[i] with respect to
275 x[j]). The keywords select a finite difference scheme for numerical
276 estimation. The scheme '3-point' is more accurate, but requires
277 twice as many operations as '2-point' (default). The scheme 'cs'
278 uses complex steps, and while potentially the most accurate, it is
279 applicable only when `fun` correctly handles complex inputs and
280 can be analytically continued to the complex plane. Method 'lm'
281 always uses the '2-point' scheme. If callable, it is used as
282 ``jac(x, *args, **kwargs)`` and should return a good approximation
283 (or the exact value) for the Jacobian as an array_like (np.atleast_2d
284 is applied), a sparse matrix (csr_matrix preferred for performance) or
285 a `scipy.sparse.linalg.LinearOperator`.
286 bounds : 2-tuple of array_like or `Bounds`, optional
287 There are two ways to specify bounds:
289 1. Instance of `Bounds` class
290 2. Lower and upper bounds on independent variables. Defaults to no
291 bounds. Each array must match the size of `x0` or be a scalar,
292 in the latter case a bound will be the same for all variables.
293 Use ``np.inf`` with an appropriate sign to disable bounds on all
294 or some variables.
295 method : {'trf', 'dogbox', 'lm'}, optional
296 Algorithm to perform minimization.
298 * 'trf' : Trust Region Reflective algorithm, particularly suitable
299 for large sparse problems with bounds. Generally robust method.
300 * 'dogbox' : dogleg algorithm with rectangular trust regions,
301 typical use case is small problems with bounds. Not recommended
302 for problems with rank-deficient Jacobian.
303 * 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.
304 Doesn't handle bounds and sparse Jacobians. Usually the most
305 efficient method for small unconstrained problems.
307 Default is 'trf'. See Notes for more information.
308 ftol : float or None, optional
309 Tolerance for termination by the change of the cost function. Default
310 is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
311 and there was an adequate agreement between a local quadratic model and
312 the true model in the last step.
314 If None and 'method' is not 'lm', the termination by this condition is
315 disabled. If 'method' is 'lm', this tolerance must be higher than
316 machine epsilon.
317 xtol : float or None, optional
318 Tolerance for termination by the change of the independent variables.
319 Default is 1e-8. The exact condition depends on the `method` used:
321 * For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``.
322 * For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is
323 a trust-region radius and ``xs`` is the value of ``x``
324 scaled according to `x_scale` parameter (see below).
326 If None and 'method' is not 'lm', the termination by this condition is
327 disabled. If 'method' is 'lm', this tolerance must be higher than
328 machine epsilon.
329 gtol : float or None, optional
330 Tolerance for termination by the norm of the gradient. Default is 1e-8.
331 The exact condition depends on a `method` used:
333 * For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where
334 ``g_scaled`` is the value of the gradient scaled to account for
335 the presence of the bounds [STIR]_.
336 * For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where
337 ``g_free`` is the gradient with respect to the variables which
338 are not in the optimal state on the boundary.
339 * For 'lm' : the maximum absolute value of the cosine of angles
340 between columns of the Jacobian and the residual vector is less
341 than `gtol`, or the residual vector is zero.
343 If None and 'method' is not 'lm', the termination by this condition is
344 disabled. If 'method' is 'lm', this tolerance must be higher than
345 machine epsilon.
346 x_scale : array_like or 'jac', optional
347 Characteristic scale of each variable. Setting `x_scale` is equivalent
348 to reformulating the problem in scaled variables ``xs = x / x_scale``.
349 An alternative view is that the size of a trust region along jth
350 dimension is proportional to ``x_scale[j]``. Improved convergence may
351 be achieved by setting `x_scale` such that a step of a given size
352 along any of the scaled variables has a similar effect on the cost
353 function. If set to 'jac', the scale is iteratively updated using the
354 inverse norms of the columns of the Jacobian matrix (as described in
355 [JJMore]_).
356 loss : str or callable, optional
357 Determines the loss function. The following keyword values are allowed:
359 * 'linear' (default) : ``rho(z) = z``. Gives a standard
360 least-squares problem.
361 * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth
362 approximation of l1 (absolute value) loss. Usually a good
363 choice for robust least squares.
364 * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works
365 similarly to 'soft_l1'.
366 * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers
367 influence, but may cause difficulties in optimization process.
368 * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on
369 a single residual, has properties similar to 'cauchy'.
371 If callable, it must take a 1-D ndarray ``z=f**2`` and return an
372 array_like with shape (3, m) where row 0 contains function values,
373 row 1 contains first derivatives and row 2 contains second
374 derivatives. Method 'lm' supports only 'linear' loss.
375 f_scale : float, optional
376 Value of soft margin between inlier and outlier residuals, default
377 is 1.0. The loss function is evaluated as follows
378 ``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`,
379 and ``rho`` is determined by `loss` parameter. This parameter has
380 no effect with ``loss='linear'``, but for other `loss` values it is
381 of crucial importance.
382 max_nfev : None or int, optional
383 Maximum number of function evaluations before the termination.
384 If None (default), the value is chosen automatically:
386 * For 'trf' and 'dogbox' : 100 * n.
387 * For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1)
388 otherwise (because 'lm' counts function calls in Jacobian
389 estimation).
391 diff_step : None or array_like, optional
392 Determines the relative step size for the finite difference
393 approximation of the Jacobian. The actual step is computed as
394 ``x * diff_step``. If None (default), then `diff_step` is taken to be
395 a conventional "optimal" power of machine epsilon for the finite
396 difference scheme used [NR]_.
397 tr_solver : {None, 'exact', 'lsmr'}, optional
398 Method for solving trust-region subproblems, relevant only for 'trf'
399 and 'dogbox' methods.
401 * 'exact' is suitable for not very large problems with dense
402 Jacobian matrices. The computational complexity per iteration is
403 comparable to a singular value decomposition of the Jacobian
404 matrix.
405 * 'lsmr' is suitable for problems with sparse and large Jacobian
406 matrices. It uses the iterative procedure
407 `scipy.sparse.linalg.lsmr` for finding a solution of a linear
408 least-squares problem and only requires matrix-vector product
409 evaluations.
411 If None (default), the solver is chosen based on the type of Jacobian
412 returned on the first iteration.
413 tr_options : dict, optional
414 Keyword options passed to trust-region solver.
416 * ``tr_solver='exact'``: `tr_options` are ignored.
417 * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.
418 Additionally, ``method='trf'`` supports 'regularize' option
419 (bool, default is True), which adds a regularization term to the
420 normal equation, which improves convergence if the Jacobian is
421 rank-deficient [Byrd]_ (eq. 3.4).
423 jac_sparsity : {None, array_like, sparse matrix}, optional
424 Defines the sparsity structure of the Jacobian matrix for finite
425 difference estimation, its shape must be (m, n). If the Jacobian has
426 only few non-zero elements in *each* row, providing the sparsity
427 structure will greatly speed up the computations [Curtis]_. A zero
428 entry means that a corresponding element in the Jacobian is identically
429 zero. If provided, forces the use of 'lsmr' trust-region solver.
430 If None (default), then dense differencing will be used. Has no effect
431 for 'lm' method.
432 verbose : {0, 1, 2}, optional
433 Level of algorithm's verbosity:
435 * 0 (default) : work silently.
436 * 1 : display a termination report.
437 * 2 : display progress during iterations (not supported by 'lm'
438 method).
440 args, kwargs : tuple and dict, optional
441 Additional arguments passed to `fun` and `jac`. Both empty by default.
442 The calling signature is ``fun(x, *args, **kwargs)`` and the same for
443 `jac`.
445 Returns
446 -------
447 result : OptimizeResult
448 `OptimizeResult` with the following fields defined:
450 x : ndarray, shape (n,)
451 Solution found.
452 cost : float
453 Value of the cost function at the solution.
454 fun : ndarray, shape (m,)
455 Vector of residuals at the solution.
456 jac : ndarray, sparse matrix or LinearOperator, shape (m, n)
457 Modified Jacobian matrix at the solution, in the sense that J^T J
458 is a Gauss-Newton approximation of the Hessian of the cost function.
459 The type is the same as the one used by the algorithm.
460 grad : ndarray, shape (m,)
461 Gradient of the cost function at the solution.
462 optimality : float
463 First-order optimality measure. In unconstrained problems, it is
464 always the uniform norm of the gradient. In constrained problems,
465 it is the quantity which was compared with `gtol` during iterations.
466 active_mask : ndarray of int, shape (n,)
467 Each component shows whether a corresponding constraint is active
468 (that is, whether a variable is at the bound):
470 * 0 : a constraint is not active.
471 * -1 : a lower bound is active.
472 * 1 : an upper bound is active.
474 Might be somewhat arbitrary for 'trf' method as it generates a
475 sequence of strictly feasible iterates and `active_mask` is
476 determined within a tolerance threshold.
477 nfev : int
478 Number of function evaluations done. Methods 'trf' and 'dogbox' do
479 not count function calls for numerical Jacobian approximation, as
480 opposed to 'lm' method.
481 njev : int or None
482 Number of Jacobian evaluations done. If numerical Jacobian
483 approximation is used in 'lm' method, it is set to None.
484 status : int
485 The reason for algorithm termination:
487 * -1 : improper input parameters status returned from MINPACK.
488 * 0 : the maximum number of function evaluations is exceeded.
489 * 1 : `gtol` termination condition is satisfied.
490 * 2 : `ftol` termination condition is satisfied.
491 * 3 : `xtol` termination condition is satisfied.
492 * 4 : Both `ftol` and `xtol` termination conditions are satisfied.
494 message : str
495 Verbal description of the termination reason.
496 success : bool
497 True if one of the convergence criteria is satisfied (`status` > 0).
499 See Also
500 --------
501 leastsq : A legacy wrapper for the MINPACK implementation of the
502 Levenberg-Marquadt algorithm.
503 curve_fit : Least-squares minimization applied to a curve-fitting problem.
505 Notes
506 -----
507 Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares
508 algorithms implemented in MINPACK (lmder, lmdif). It runs the
509 Levenberg-Marquardt algorithm formulated as a trust-region type algorithm.
510 The implementation is based on paper [JJMore]_, it is very robust and
511 efficient with a lot of smart tricks. It should be your first choice
512 for unconstrained problems. Note that it doesn't support bounds. Also,
513 it doesn't work when m < n.
515 Method 'trf' (Trust Region Reflective) is motivated by the process of
516 solving a system of equations, which constitute the first-order optimality
517 condition for a bound-constrained minimization problem as formulated in
518 [STIR]_. The algorithm iteratively solves trust-region subproblems
519 augmented by a special diagonal quadratic term and with trust-region shape
520 determined by the distance from the bounds and the direction of the
521 gradient. This enhancements help to avoid making steps directly into bounds
522 and efficiently explore the whole space of variables. To further improve
523 convergence, the algorithm considers search directions reflected from the
524 bounds. To obey theoretical requirements, the algorithm keeps iterates
525 strictly feasible. With dense Jacobians trust-region subproblems are
526 solved by an exact method very similar to the one described in [JJMore]_
527 (and implemented in MINPACK). The difference from the MINPACK
528 implementation is that a singular value decomposition of a Jacobian
529 matrix is done once per iteration, instead of a QR decomposition and series
530 of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace
531 approach of solving trust-region subproblems is used [STIR]_, [Byrd]_.
532 The subspace is spanned by a scaled gradient and an approximate
533 Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no
534 constraints are imposed the algorithm is very similar to MINPACK and has
535 generally comparable performance. The algorithm works quite robust in
536 unbounded and bounded problems, thus it is chosen as a default algorithm.
538 Method 'dogbox' operates in a trust-region framework, but considers
539 rectangular trust regions as opposed to conventional ellipsoids [Voglis]_.
540 The intersection of a current trust region and initial bounds is again
541 rectangular, so on each iteration a quadratic minimization problem subject
542 to bound constraints is solved approximately by Powell's dogleg method
543 [NumOpt]_. The required Gauss-Newton step can be computed exactly for
544 dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large
545 sparse Jacobians. The algorithm is likely to exhibit slow convergence when
546 the rank of Jacobian is less than the number of variables. The algorithm
547 often outperforms 'trf' in bounded problems with a small number of
548 variables.
550 Robust loss functions are implemented as described in [BA]_. The idea
551 is to modify a residual vector and a Jacobian matrix on each iteration
552 such that computed gradient and Gauss-Newton Hessian approximation match
553 the true gradient and Hessian approximation of the cost function. Then
554 the algorithm proceeds in a normal way, i.e., robust loss functions are
555 implemented as a simple wrapper over standard least-squares algorithms.
557 .. versionadded:: 0.17.0
559 References
560 ----------
561 .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
562 and Conjugate Gradient Method for Large-Scale Bound-Constrained
563 Minimization Problems," SIAM Journal on Scientific Computing,
564 Vol. 21, Number 1, pp 1-23, 1999.
565 .. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific
566 Computing. 3rd edition", Sec. 5.7.
567 .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate
568 solution of the trust region problem by minimization over
569 two-dimensional subspaces", Math. Programming, 40, pp. 247-263,
570 1988.
571 .. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
572 sparse Jacobian matrices", Journal of the Institute of
573 Mathematics and its Applications, 13, pp. 117-120, 1974.
574 .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation
575 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
576 Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
577 .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region
578 Dogleg Approach for Unconstrained and Bound Constrained
579 Nonlinear Optimization", WSEAS International Conference on
580 Applied Mathematics, Corfu, Greece, 2004.
581 .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization,
582 2nd edition", Chapter 4.
583 .. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis",
584 Proceedings of the International Workshop on Vision Algorithms:
585 Theory and Practice, pp. 298-372, 1999.
587 Examples
588 --------
589 In this example we find a minimum of the Rosenbrock function without bounds
590 on independent variables.
592 >>> import numpy as np
593 >>> def fun_rosenbrock(x):
594 ... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])
596 Notice that we only provide the vector of the residuals. The algorithm
597 constructs the cost function as a sum of squares of the residuals, which
598 gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.
600 >>> from scipy.optimize import least_squares
601 >>> x0_rosenbrock = np.array([2, 2])
602 >>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
603 >>> res_1.x
604 array([ 1., 1.])
605 >>> res_1.cost
606 9.8669242910846867e-30
607 >>> res_1.optimality
608 8.8928864934219529e-14
610 We now constrain the variables, in such a way that the previous solution
611 becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and
612 ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter
613 to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.
615 We also provide the analytic Jacobian:
617 >>> def jac_rosenbrock(x):
618 ... return np.array([
619 ... [-20 * x[0], 10],
620 ... [-1, 0]])
622 Putting this all together, we see that the new solution lies on the bound:
624 >>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
625 ... bounds=([-np.inf, 1.5], np.inf))
626 >>> res_2.x
627 array([ 1.22437075, 1.5 ])
628 >>> res_2.cost
629 0.025213093946805685
630 >>> res_2.optimality
631 1.5885401433157753e-07
633 Now we solve a system of equations (i.e., the cost function should be zero
634 at a minimum) for a Broyden tridiagonal vector-valued function of 100000
635 variables:
637 >>> def fun_broyden(x):
638 ... f = (3 - x) * x + 1
639 ... f[1:] -= x[:-1]
640 ... f[:-1] -= 2 * x[1:]
641 ... return f
643 The corresponding Jacobian matrix is sparse. We tell the algorithm to
644 estimate it by finite differences and provide the sparsity structure of
645 Jacobian to significantly speed up this process.
647 >>> from scipy.sparse import lil_matrix
648 >>> def sparsity_broyden(n):
649 ... sparsity = lil_matrix((n, n), dtype=int)
650 ... i = np.arange(n)
651 ... sparsity[i, i] = 1
652 ... i = np.arange(1, n)
653 ... sparsity[i, i - 1] = 1
654 ... i = np.arange(n - 1)
655 ... sparsity[i, i + 1] = 1
656 ... return sparsity
657 ...
658 >>> n = 100000
659 >>> x0_broyden = -np.ones(n)
660 ...
661 >>> res_3 = least_squares(fun_broyden, x0_broyden,
662 ... jac_sparsity=sparsity_broyden(n))
663 >>> res_3.cost
664 4.5687069299604613e-23
665 >>> res_3.optimality
666 1.1650454296851518e-11
668 Let's also solve a curve fitting problem using robust loss function to
669 take care of outliers in the data. Define the model function as
670 ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an
671 observation and a, b, c are parameters to estimate.
673 First, define the function which generates the data with noise and
674 outliers, define the model parameters, and generate data:
676 >>> from numpy.random import default_rng
677 >>> rng = default_rng()
678 >>> def gen_data(t, a, b, c, noise=0., n_outliers=0, seed=None):
679 ... rng = default_rng(seed)
680 ...
681 ... y = a + b * np.exp(t * c)
682 ...
683 ... error = noise * rng.standard_normal(t.size)
684 ... outliers = rng.integers(0, t.size, n_outliers)
685 ... error[outliers] *= 10
686 ...
687 ... return y + error
688 ...
689 >>> a = 0.5
690 >>> b = 2.0
691 >>> c = -1
692 >>> t_min = 0
693 >>> t_max = 10
694 >>> n_points = 15
695 ...
696 >>> t_train = np.linspace(t_min, t_max, n_points)
697 >>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
699 Define function for computing residuals and initial estimate of
700 parameters.
702 >>> def fun(x, t, y):
703 ... return x[0] + x[1] * np.exp(x[2] * t) - y
704 ...
705 >>> x0 = np.array([1.0, 1.0, 0.0])
707 Compute a standard least-squares solution:
709 >>> res_lsq = least_squares(fun, x0, args=(t_train, y_train))
711 Now compute two solutions with two different robust loss functions. The
712 parameter `f_scale` is set to 0.1, meaning that inlier residuals should
713 not significantly exceed 0.1 (the noise level used).
715 >>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,
716 ... args=(t_train, y_train))
717 >>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,
718 ... args=(t_train, y_train))
720 And, finally, plot all the curves. We see that by selecting an appropriate
721 `loss` we can get estimates close to optimal even in the presence of
722 strong outliers. But keep in mind that generally it is recommended to try
723 'soft_l1' or 'huber' losses first (if at all necessary) as the other two
724 options may cause difficulties in optimization process.
726 >>> t_test = np.linspace(t_min, t_max, n_points * 10)
727 >>> y_true = gen_data(t_test, a, b, c)
728 >>> y_lsq = gen_data(t_test, *res_lsq.x)
729 >>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
730 >>> y_log = gen_data(t_test, *res_log.x)
731 ...
732 >>> import matplotlib.pyplot as plt
733 >>> plt.plot(t_train, y_train, 'o')
734 >>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
735 >>> plt.plot(t_test, y_lsq, label='linear loss')
736 >>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss')
737 >>> plt.plot(t_test, y_log, label='cauchy loss')
738 >>> plt.xlabel("t")
739 >>> plt.ylabel("y")
740 >>> plt.legend()
741 >>> plt.show()
743 In the next example, we show how complex-valued residual functions of
744 complex variables can be optimized with ``least_squares()``. Consider the
745 following function:
747 >>> def f(z):
748 ... return z - (0.5 + 0.5j)
750 We wrap it into a function of real variables that returns real residuals
751 by simply handling the real and imaginary parts as independent variables:
753 >>> def f_wrap(x):
754 ... fx = f(x[0] + 1j*x[1])
755 ... return np.array([fx.real, fx.imag])
757 Thus, instead of the original m-D complex function of n complex
758 variables we optimize a 2m-D real function of 2n real variables:
760 >>> from scipy.optimize import least_squares
761 >>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1]))
762 >>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j
763 >>> z
764 (0.49999999999925893+0.49999999999925893j)
766 """
767 if method not in ['trf', 'dogbox', 'lm']:
768 raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.")
770 if jac not in ['2-point', '3-point', 'cs'] and not callable(jac):
771 raise ValueError("`jac` must be '2-point', '3-point', 'cs' or "
772 "callable.")
774 if tr_solver not in [None, 'exact', 'lsmr']:
775 raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.")
777 if loss not in IMPLEMENTED_LOSSES and not callable(loss):
778 raise ValueError("`loss` must be one of {0} or a callable."
779 .format(IMPLEMENTED_LOSSES.keys()))
781 if method == 'lm' and loss != 'linear':
782 raise ValueError("method='lm' supports only 'linear' loss function.")
784 if verbose not in [0, 1, 2]:
785 raise ValueError("`verbose` must be in [0, 1, 2].")
787 if max_nfev is not None and max_nfev <= 0:
788 raise ValueError("`max_nfev` must be None or positive integer.")
790 if np.iscomplexobj(x0):
791 raise ValueError("`x0` must be real.")
793 x0 = np.atleast_1d(x0).astype(float)
795 if x0.ndim > 1:
796 raise ValueError("`x0` must have at most 1 dimension.")
798 if isinstance(bounds, Bounds):
799 lb, ub = bounds.lb, bounds.ub
800 bounds = (lb, ub)
801 else:
802 if len(bounds) == 2:
803 lb, ub = prepare_bounds(bounds, x0.shape[0])
804 else:
805 raise ValueError("`bounds` must contain 2 elements.")
807 if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)):
808 raise ValueError("Method 'lm' doesn't support bounds.")
810 if lb.shape != x0.shape or ub.shape != x0.shape:
811 raise ValueError("Inconsistent shapes between bounds and `x0`.")
813 if np.any(lb >= ub):
814 raise ValueError("Each lower bound must be strictly less than each "
815 "upper bound.")
817 if not in_bounds(x0, lb, ub):
818 raise ValueError("`x0` is infeasible.")
820 x_scale = check_x_scale(x_scale, x0)
822 ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)
824 def fun_wrapped(x):
825 return np.atleast_1d(fun(x, *args, **kwargs))
827 if method == 'trf':
828 x0 = make_strictly_feasible(x0, lb, ub)
830 f0 = fun_wrapped(x0)
832 if f0.ndim != 1:
833 raise ValueError("`fun` must return at most 1-d array_like. "
834 "f0.shape: {0}".format(f0.shape))
836 if not np.all(np.isfinite(f0)):
837 raise ValueError("Residuals are not finite in the initial point.")
839 n = x0.size
840 m = f0.size
842 if method == 'lm' and m < n:
843 raise ValueError("Method 'lm' doesn't work when the number of "
844 "residuals is less than the number of variables.")
846 loss_function = construct_loss_function(m, loss, f_scale)
847 if callable(loss):
848 rho = loss_function(f0)
849 if rho.shape != (3, m):
850 raise ValueError("The return value of `loss` callable has wrong "
851 "shape.")
852 initial_cost = 0.5 * np.sum(rho[0])
853 elif loss_function is not None:
854 initial_cost = loss_function(f0, cost_only=True)
855 else:
856 initial_cost = 0.5 * np.dot(f0, f0)
858 if callable(jac):
859 J0 = jac(x0, *args, **kwargs)
861 if issparse(J0):
862 J0 = J0.tocsr()
864 def jac_wrapped(x, _=None):
865 return jac(x, *args, **kwargs).tocsr()
867 elif isinstance(J0, LinearOperator):
868 def jac_wrapped(x, _=None):
869 return jac(x, *args, **kwargs)
871 else:
872 J0 = np.atleast_2d(J0)
874 def jac_wrapped(x, _=None):
875 return np.atleast_2d(jac(x, *args, **kwargs))
877 else: # Estimate Jacobian by finite differences.
878 if method == 'lm':
879 if jac_sparsity is not None:
880 raise ValueError("method='lm' does not support "
881 "`jac_sparsity`.")
883 if jac != '2-point':
884 warn("jac='{0}' works equivalently to '2-point' "
885 "for method='lm'.".format(jac))
887 J0 = jac_wrapped = None
888 else:
889 if jac_sparsity is not None and tr_solver == 'exact':
890 raise ValueError("tr_solver='exact' is incompatible "
891 "with `jac_sparsity`.")
893 jac_sparsity = check_jac_sparsity(jac_sparsity, m, n)
895 def jac_wrapped(x, f):
896 J = approx_derivative(fun, x, rel_step=diff_step, method=jac,
897 f0=f, bounds=bounds, args=args,
898 kwargs=kwargs, sparsity=jac_sparsity)
899 if J.ndim != 2: # J is guaranteed not sparse.
900 J = np.atleast_2d(J)
902 return J
904 J0 = jac_wrapped(x0, f0)
906 if J0 is not None:
907 if J0.shape != (m, n):
908 raise ValueError(
909 "The return value of `jac` has wrong shape: expected {0}, "
910 "actual {1}.".format((m, n), J0.shape))
912 if not isinstance(J0, np.ndarray):
913 if method == 'lm':
914 raise ValueError("method='lm' works only with dense "
915 "Jacobian matrices.")
917 if tr_solver == 'exact':
918 raise ValueError(
919 "tr_solver='exact' works only with dense "
920 "Jacobian matrices.")
922 jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
923 if isinstance(J0, LinearOperator) and jac_scale:
924 raise ValueError("x_scale='jac' can't be used when `jac` "
925 "returns LinearOperator.")
927 if tr_solver is None:
928 if isinstance(J0, np.ndarray):
929 tr_solver = 'exact'
930 else:
931 tr_solver = 'lsmr'
933 if method == 'lm':
934 result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol,
935 max_nfev, x_scale, diff_step)
937 elif method == 'trf':
938 result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,
939 gtol, max_nfev, x_scale, loss_function, tr_solver,
940 tr_options.copy(), verbose)
942 elif method == 'dogbox':
943 if tr_solver == 'lsmr' and 'regularize' in tr_options:
944 warn("The keyword 'regularize' in `tr_options` is not relevant "
945 "for 'dogbox' method.")
946 tr_options = tr_options.copy()
947 del tr_options['regularize']
949 result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol,
950 xtol, gtol, max_nfev, x_scale, loss_function,
951 tr_solver, tr_options, verbose)
953 result.message = TERMINATION_MESSAGES[result.status]
954 result.success = result.status > 0
956 if verbose >= 1:
957 print(result.message)
958 print("Function evaluations {0}, initial cost {1:.4e}, final cost "
959 "{2:.4e}, first-order optimality {3:.2e}."
960 .format(result.nfev, initial_cost, result.cost,
961 result.optimality))
963 return result