Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lsq/least

1"""Generic interface for least-squares minimization."""

2from warnings import warn

4import numpy as np

5from numpy.linalg import norm

7from scipy.sparse import issparse

8from scipy.sparse.linalg import LinearOperator

9from scipy.optimize import _minpack, OptimizeResult

10from scipy.optimize._numdiff import approx_derivative, group_columns

11from scipy.optimize._minimize import Bounds

13from .trf import trf

14from .dogbox import dogbox

15from .common import EPS, in_bounds, make_strictly_feasible

18TERMINATION_MESSAGES = {

19 -1: "Improper input parameters status returned from `leastsq`",

20 0: "The maximum number of function evaluations is exceeded.",

21 1: "`gtol` termination condition is satisfied.",

22 2: "`ftol` termination condition is satisfied.",

23 3: "`xtol` termination condition is satisfied.",

24 4: "Both `ftol` and `xtol` termination conditions are satisfied."

25}

28FROM_MINPACK_TO_COMMON = {

29 0: -1, # Improper input parameters from MINPACK.

30 1: 2,

31 2: 3,

32 3: 4,

33 4: 1,

34 5: 0

35 # There are 6, 7, 8 for too small tolerance parameters,

36 # but we guard against it by checking ftol, xtol, gtol beforehand.

37}

40def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):

41 n = x0.size

43 if diff_step is None:

44 epsfcn = EPS

45 else:

46 epsfcn = diff_step**2

48 # Compute MINPACK's `diag`, which is inverse of our `x_scale` and

49 # ``x_scale='jac'`` corresponds to ``diag=None``.

50 if isinstance(x_scale, str) and x_scale == 'jac':

51 diag = None

52 else:

53 diag = 1 / x_scale

55 full_output = True

56 col_deriv = False

57 factor = 100.0

59 if jac is None:

60 if max_nfev is None:

61 # n squared to account for Jacobian evaluations.

62 max_nfev = 100 * n * (n + 1)

63 x, info, status = _minpack._lmdif(

64 fun, x0, (), full_output, ftol, xtol, gtol,

65 max_nfev, epsfcn, factor, diag)

66 else:

67 if max_nfev is None:

68 max_nfev = 100 * n

69 x, info, status = _minpack._lmder(

70 fun, jac, x0, (), full_output, col_deriv,

71 ftol, xtol, gtol, max_nfev, factor, diag)

73 f = info['fvec']

75 if callable(jac):

76 J = jac(x)

77 else:

78 J = np.atleast_2d(approx_derivative(fun, x))

80 cost = 0.5 * np.dot(f, f)

81 g = J.T.dot(f)

82 g_norm = norm(g, ord=np.inf)

84 nfev = info['nfev']

85 njev = info.get('njev', None)

87 status = FROM_MINPACK_TO_COMMON[status]

88 active_mask = np.zeros_like(x0, dtype=int)

90 return OptimizeResult(

91 x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,

92 active_mask=active_mask, nfev=nfev, njev=njev, status=status)

95def prepare_bounds(bounds, n):

96 lb, ub = [np.asarray(b, dtype=float) for b in bounds]

97 if lb.ndim == 0:

98 lb = np.resize(lb, n)

100 if ub.ndim == 0:

101 ub = np.resize(ub, n)

102

103 return lb, ub

104

105

106def check_tolerance(ftol, xtol, gtol, method):

107 def check(tol, name):

108 if tol is None:

109 tol = 0

110 elif tol < EPS:

111 warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "

112 "disables the corresponding termination condition."

113 .format(name, EPS))

114 return tol

115

116 ftol = check(ftol, "ftol")

117 xtol = check(xtol, "xtol")

118 gtol = check(gtol, "gtol")

119

120 if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):

121 raise ValueError("All tolerances must be higher than machine epsilon "

122 "({:.2e}) for method 'lm'.".format(EPS))

123 elif ftol < EPS and xtol < EPS and gtol < EPS:

124 raise ValueError("At least one of the tolerances must be higher than "

125 "machine epsilon ({:.2e}).".format(EPS))

126

127 return ftol, xtol, gtol

128

129

130def check_x_scale(x_scale, x0):

131 if isinstance(x_scale, str) and x_scale == 'jac':

132 return x_scale

133

134 try:

135 x_scale = np.asarray(x_scale, dtype=float)

136 valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)

137 except (ValueError, TypeError):

138 valid = False

139

140 if not valid:

141 raise ValueError("`x_scale` must be 'jac' or array_like with "

142 "positive numbers.")

143

144 if x_scale.ndim == 0:

145 x_scale = np.resize(x_scale, x0.shape)

146

147 if x_scale.shape != x0.shape:

148 raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")

149

150 return x_scale

151

152

153def check_jac_sparsity(jac_sparsity, m, n):

154 if jac_sparsity is None:

155 return None

156

157 if not issparse(jac_sparsity):

158 jac_sparsity = np.atleast_2d(jac_sparsity)

159

160 if jac_sparsity.shape != (m, n):

161 raise ValueError("`jac_sparsity` has wrong shape.")

162

163 return jac_sparsity, group_columns(jac_sparsity)

166# Loss functions.

169def huber(z, rho, cost_only):

170 mask = z <= 1

171 rho[0, mask] = z[mask]

172 rho[0, ~mask] = 2 * z[~mask]**0.5 - 1

173 if cost_only:

174 return

175 rho[1, mask] = 1

176 rho[1, ~mask] = z[~mask]**-0.5

177 rho[2, mask] = 0

178 rho[2, ~mask] = -0.5 * z[~mask]**-1.5

179

180

181def soft_l1(z, rho, cost_only):

182 t = 1 + z

183 rho[0] = 2 * (t**0.5 - 1)

184 if cost_only:

185 return

186 rho[1] = t**-0.5

187 rho[2] = -0.5 * t**-1.5

188

189

190def cauchy(z, rho, cost_only):

191 rho[0] = np.log1p(z)

192 if cost_only:

193 return

194 t = 1 + z

195 rho[1] = 1 / t

196 rho[2] = -1 / t**2

197

198

199def arctan(z, rho, cost_only):

200 rho[0] = np.arctan(z)

201 if cost_only:

202 return

203 t = 1 + z**2

204 rho[1] = 1 / t

205 rho[2] = -2 * z / t**2

206

207

208IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,

209 cauchy=cauchy, arctan=arctan)

210

211

212def construct_loss_function(m, loss, f_scale):

213 if loss == 'linear':

214 return None

215

216 if not callable(loss):

217 loss = IMPLEMENTED_LOSSES[loss]

218 rho = np.empty((3, m))

219

220 def loss_function(f, cost_only=False):

221 z = (f / f_scale) ** 2

222 loss(z, rho, cost_only=cost_only)

223 if cost_only:

224 return 0.5 * f_scale ** 2 * np.sum(rho[0])

225 rho[0] *= f_scale ** 2

226 rho[2] /= f_scale ** 2

227 return rho

228 else:

229 def loss_function(f, cost_only=False):

230 z = (f / f_scale) ** 2

231 rho = loss(z)

232 if cost_only:

233 return 0.5 * f_scale ** 2 * np.sum(rho[0])

234 rho[0] *= f_scale ** 2

235 rho[2] /= f_scale ** 2

236 return rho

237

238 return loss_function

239

240

241def least_squares(

242 fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',

243 ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',

244 f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},

245 jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):

246 """Solve a nonlinear least-squares problem with bounds on the variables.

247

248 Given the residuals f(x) (an m-D real function of n real

249 variables) and the loss function rho(s) (a scalar function), `least_squares`

250 finds a local minimum of the cost function F(x)::

251

252 minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)

253 subject to lb <= x <= ub

254

255 The purpose of the loss function rho(s) is to reduce the influence of

256 outliers on the solution.

257

258 Parameters

259 ----------

260 fun : callable

261 Function which computes the vector of residuals, with the signature

262 ``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with

263 respect to its first argument. The argument ``x`` passed to this

264 function is an ndarray of shape (n,) (never a scalar, even for n=1).

265 It must allocate and return a 1-D array_like of shape (m,) or a scalar.

266 If the argument ``x`` is complex or the function ``fun`` returns

267 complex residuals, it must be wrapped in a real function of real

268 arguments, as shown at the end of the Examples section.

269 x0 : array_like with shape (n,) or float

270 Initial guess on independent variables. If float, it will be treated

271 as a 1-D array with one element.

272 jac : {'2-point', '3-point', 'cs', callable}, optional

273 Method of computing the Jacobian matrix (an m-by-n matrix, where

274 element (i, j) is the partial derivative of f[i] with respect to

275 x[j]). The keywords select a finite difference scheme for numerical

276 estimation. The scheme '3-point' is more accurate, but requires

277 twice as many operations as '2-point' (default). The scheme 'cs'

278 uses complex steps, and while potentially the most accurate, it is

279 applicable only when `fun` correctly handles complex inputs and

280 can be analytically continued to the complex plane. Method 'lm'

281 always uses the '2-point' scheme. If callable, it is used as

282 ``jac(x, *args, **kwargs)`` and should return a good approximation

283 (or the exact value) for the Jacobian as an array_like (np.atleast_2d

284 is applied), a sparse matrix (csr_matrix preferred for performance) or

285 a `scipy.sparse.linalg.LinearOperator`.

286 bounds : 2-tuple of array_like or `Bounds`, optional

287 There are two ways to specify bounds:

288

289 1. Instance of `Bounds` class

290 2. Lower and upper bounds on independent variables. Defaults to no

291 bounds. Each array must match the size of `x0` or be a scalar,

292 in the latter case a bound will be the same for all variables.

293 Use ``np.inf`` with an appropriate sign to disable bounds on all

294 or some variables.

295 method : {'trf', 'dogbox', 'lm'}, optional

296 Algorithm to perform minimization.

297

298 * 'trf' : Trust Region Reflective algorithm, particularly suitable

299 for large sparse problems with bounds. Generally robust method.

300 * 'dogbox' : dogleg algorithm with rectangular trust regions,

301 typical use case is small problems with bounds. Not recommended

302 for problems with rank-deficient Jacobian.

303 * 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.

304 Doesn't handle bounds and sparse Jacobians. Usually the most

305 efficient method for small unconstrained problems.

306

307 Default is 'trf'. See Notes for more information.

308 ftol : float or None, optional

309 Tolerance for termination by the change of the cost function. Default

310 is 1e-8. The optimization process is stopped when ``dF < ftol * F``,

311 and there was an adequate agreement between a local quadratic model and

312 the true model in the last step.

313

314 If None and 'method' is not 'lm', the termination by this condition is

315 disabled. If 'method' is 'lm', this tolerance must be higher than

316 machine epsilon.

317 xtol : float or None, optional

318 Tolerance for termination by the change of the independent variables.

319 Default is 1e-8. The exact condition depends on the `method` used:

320

321 * For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``.

322 * For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is

323 a trust-region radius and ``xs`` is the value of ``x``

324 scaled according to `x_scale` parameter (see below).

325

326 If None and 'method' is not 'lm', the termination by this condition is

327 disabled. If 'method' is 'lm', this tolerance must be higher than

328 machine epsilon.

329 gtol : float or None, optional

330 Tolerance for termination by the norm of the gradient. Default is 1e-8.

331 The exact condition depends on a `method` used:

332

333 * For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where

334 ``g_scaled`` is the value of the gradient scaled to account for

335 the presence of the bounds [STIR]_.

336 * For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where

337 ``g_free`` is the gradient with respect to the variables which

338 are not in the optimal state on the boundary.

339 * For 'lm' : the maximum absolute value of the cosine of angles

340 between columns of the Jacobian and the residual vector is less

341 than `gtol`, or the residual vector is zero.

342

343 If None and 'method' is not 'lm', the termination by this condition is

344 disabled. If 'method' is 'lm', this tolerance must be higher than

345 machine epsilon.

346 x_scale : array_like or 'jac', optional

347 Characteristic scale of each variable. Setting `x_scale` is equivalent

348 to reformulating the problem in scaled variables ``xs = x / x_scale``.

349 An alternative view is that the size of a trust region along jth

350 dimension is proportional to ``x_scale[j]``. Improved convergence may

351 be achieved by setting `x_scale` such that a step of a given size

352 along any of the scaled variables has a similar effect on the cost

353 function. If set to 'jac', the scale is iteratively updated using the

354 inverse norms of the columns of the Jacobian matrix (as described in

355 [JJMore]_).

356 loss : str or callable, optional

357 Determines the loss function. The following keyword values are allowed:

358

359 * 'linear' (default) : ``rho(z) = z``. Gives a standard

360 least-squares problem.

361 * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth

362 approximation of l1 (absolute value) loss. Usually a good

363 choice for robust least squares.

364 * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works

365 similarly to 'soft_l1'.

366 * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers

367 influence, but may cause difficulties in optimization process.

368 * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on

369 a single residual, has properties similar to 'cauchy'.

370

371 If callable, it must take a 1-D ndarray ``z=f**2`` and return an

372 array_like with shape (3, m) where row 0 contains function values,

373 row 1 contains first derivatives and row 2 contains second

374 derivatives. Method 'lm' supports only 'linear' loss.

375 f_scale : float, optional

376 Value of soft margin between inlier and outlier residuals, default

377 is 1.0. The loss function is evaluated as follows

378 ``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`,

379 and ``rho`` is determined by `loss` parameter. This parameter has

380 no effect with ``loss='linear'``, but for other `loss` values it is

381 of crucial importance.

382 max_nfev : None or int, optional

383 Maximum number of function evaluations before the termination.

384 If None (default), the value is chosen automatically:

385

386 * For 'trf' and 'dogbox' : 100 * n.

387 * For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1)

388 otherwise (because 'lm' counts function calls in Jacobian

389 estimation).

390

391 diff_step : None or array_like, optional

392 Determines the relative step size for the finite difference

393 approximation of the Jacobian. The actual step is computed as

394 ``x * diff_step``. If None (default), then `diff_step` is taken to be

395 a conventional "optimal" power of machine epsilon for the finite

396 difference scheme used [NR]_.

397 tr_solver : {None, 'exact', 'lsmr'}, optional

398 Method for solving trust-region subproblems, relevant only for 'trf'

399 and 'dogbox' methods.

400

401 * 'exact' is suitable for not very large problems with dense

402 Jacobian matrices. The computational complexity per iteration is

403 comparable to a singular value decomposition of the Jacobian

404 matrix.

405 * 'lsmr' is suitable for problems with sparse and large Jacobian

406 matrices. It uses the iterative procedure

407 `scipy.sparse.linalg.lsmr` for finding a solution of a linear

408 least-squares problem and only requires matrix-vector product

409 evaluations.

410

411 If None (default), the solver is chosen based on the type of Jacobian

412 returned on the first iteration.

413 tr_options : dict, optional

414 Keyword options passed to trust-region solver.

415

416 * ``tr_solver='exact'``: `tr_options` are ignored.

417 * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.

418 Additionally, ``method='trf'`` supports 'regularize' option

419 (bool, default is True), which adds a regularization term to the

420 normal equation, which improves convergence if the Jacobian is

421 rank-deficient [Byrd]_ (eq. 3.4).

422

423 jac_sparsity : {None, array_like, sparse matrix}, optional

424 Defines the sparsity structure of the Jacobian matrix for finite

425 difference estimation, its shape must be (m, n). If the Jacobian has

426 only few non-zero elements in *each* row, providing the sparsity

427 structure will greatly speed up the computations [Curtis]_. A zero

428 entry means that a corresponding element in the Jacobian is identically

429 zero. If provided, forces the use of 'lsmr' trust-region solver.

430 If None (default), then dense differencing will be used. Has no effect

431 for 'lm' method.

432 verbose : {0, 1, 2}, optional

433 Level of algorithm's verbosity:

434

435 * 0 (default) : work silently.

436 * 1 : display a termination report.

437 * 2 : display progress during iterations (not supported by 'lm'

438 method).

439

440 args, kwargs : tuple and dict, optional

441 Additional arguments passed to `fun` and `jac`. Both empty by default.

442 The calling signature is ``fun(x, *args, **kwargs)`` and the same for

443 `jac`.

444

445 Returns

446 -------

447 result : OptimizeResult

448 `OptimizeResult` with the following fields defined:

449

450 x : ndarray, shape (n,)

451 Solution found.

452 cost : float

453 Value of the cost function at the solution.

454 fun : ndarray, shape (m,)

455 Vector of residuals at the solution.

456 jac : ndarray, sparse matrix or LinearOperator, shape (m, n)

457 Modified Jacobian matrix at the solution, in the sense that J^T J

458 is a Gauss-Newton approximation of the Hessian of the cost function.

459 The type is the same as the one used by the algorithm.

460 grad : ndarray, shape (m,)

461 Gradient of the cost function at the solution.

462 optimality : float

463 First-order optimality measure. In unconstrained problems, it is

464 always the uniform norm of the gradient. In constrained problems,

465 it is the quantity which was compared with `gtol` during iterations.

466 active_mask : ndarray of int, shape (n,)

467 Each component shows whether a corresponding constraint is active

468 (that is, whether a variable is at the bound):

469

470 * 0 : a constraint is not active.

471 * -1 : a lower bound is active.

472 * 1 : an upper bound is active.

473

474 Might be somewhat arbitrary for 'trf' method as it generates a

475 sequence of strictly feasible iterates and `active_mask` is

476 determined within a tolerance threshold.

477 nfev : int

478 Number of function evaluations done. Methods 'trf' and 'dogbox' do

479 not count function calls for numerical Jacobian approximation, as

480 opposed to 'lm' method.

481 njev : int or None

482 Number of Jacobian evaluations done. If numerical Jacobian

483 approximation is used in 'lm' method, it is set to None.

484 status : int

485 The reason for algorithm termination:

486

487 * -1 : improper input parameters status returned from MINPACK.

488 * 0 : the maximum number of function evaluations is exceeded.

489 * 1 : `gtol` termination condition is satisfied.

490 * 2 : `ftol` termination condition is satisfied.

491 * 3 : `xtol` termination condition is satisfied.

492 * 4 : Both `ftol` and `xtol` termination conditions are satisfied.

493

494 message : str

495 Verbal description of the termination reason.

496 success : bool

497 True if one of the convergence criteria is satisfied (`status` > 0).

498

499 See Also

500 --------

501 leastsq : A legacy wrapper for the MINPACK implementation of the

502 Levenberg-Marquadt algorithm.

503 curve_fit : Least-squares minimization applied to a curve-fitting problem.

504

505 Notes

506 -----

507 Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares

508 algorithms implemented in MINPACK (lmder, lmdif). It runs the

509 Levenberg-Marquardt algorithm formulated as a trust-region type algorithm.

510 The implementation is based on paper [JJMore]_, it is very robust and

511 efficient with a lot of smart tricks. It should be your first choice

512 for unconstrained problems. Note that it doesn't support bounds. Also,

513 it doesn't work when m < n.

514

515 Method 'trf' (Trust Region Reflective) is motivated by the process of

516 solving a system of equations, which constitute the first-order optimality

517 condition for a bound-constrained minimization problem as formulated in

518 [STIR]_. The algorithm iteratively solves trust-region subproblems

519 augmented by a special diagonal quadratic term and with trust-region shape

520 determined by the distance from the bounds and the direction of the

521 gradient. This enhancements help to avoid making steps directly into bounds

522 and efficiently explore the whole space of variables. To further improve

523 convergence, the algorithm considers search directions reflected from the

524 bounds. To obey theoretical requirements, the algorithm keeps iterates

525 strictly feasible. With dense Jacobians trust-region subproblems are

526 solved by an exact method very similar to the one described in [JJMore]_

527 (and implemented in MINPACK). The difference from the MINPACK

528 implementation is that a singular value decomposition of a Jacobian

529 matrix is done once per iteration, instead of a QR decomposition and series

530 of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace

531 approach of solving trust-region subproblems is used [STIR]_, [Byrd]_.

532 The subspace is spanned by a scaled gradient and an approximate

533 Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no

534 constraints are imposed the algorithm is very similar to MINPACK and has

535 generally comparable performance. The algorithm works quite robust in

536 unbounded and bounded problems, thus it is chosen as a default algorithm.

537

538 Method 'dogbox' operates in a trust-region framework, but considers

539 rectangular trust regions as opposed to conventional ellipsoids [Voglis]_.

540 The intersection of a current trust region and initial bounds is again

541 rectangular, so on each iteration a quadratic minimization problem subject

542 to bound constraints is solved approximately by Powell's dogleg method

543 [NumOpt]_. The required Gauss-Newton step can be computed exactly for

544 dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large

545 sparse Jacobians. The algorithm is likely to exhibit slow convergence when

546 the rank of Jacobian is less than the number of variables. The algorithm

547 often outperforms 'trf' in bounded problems with a small number of

548 variables.

549

550 Robust loss functions are implemented as described in [BA]_. The idea

551 is to modify a residual vector and a Jacobian matrix on each iteration

552 such that computed gradient and Gauss-Newton Hessian approximation match

553 the true gradient and Hessian approximation of the cost function. Then

554 the algorithm proceeds in a normal way, i.e., robust loss functions are

555 implemented as a simple wrapper over standard least-squares algorithms.

556

557 .. versionadded:: 0.17.0

558

559 References

560 ----------

561 .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,

562 and Conjugate Gradient Method for Large-Scale Bound-Constrained

563 Minimization Problems," SIAM Journal on Scientific Computing,

564 Vol. 21, Number 1, pp 1-23, 1999.

565 .. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific

566 Computing. 3rd edition", Sec. 5.7.

567 .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate

568 solution of the trust region problem by minimization over

569 two-dimensional subspaces", Math. Programming, 40, pp. 247-263,

570 1988.

571 .. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of

572 sparse Jacobian matrices", Journal of the Institute of

573 Mathematics and its Applications, 13, pp. 117-120, 1974.

574 .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation

575 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture

576 Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.

577 .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region

578 Dogleg Approach for Unconstrained and Bound Constrained

579 Nonlinear Optimization", WSEAS International Conference on

580 Applied Mathematics, Corfu, Greece, 2004.

581 .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization,

582 2nd edition", Chapter 4.

583 .. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis",

584 Proceedings of the International Workshop on Vision Algorithms:

585 Theory and Practice, pp. 298-372, 1999.

586

587 Examples

588 --------

589 In this example we find a minimum of the Rosenbrock function without bounds

590 on independent variables.

591

592 >>> import numpy as np

593 >>> def fun_rosenbrock(x):

594 ... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])

595

596 Notice that we only provide the vector of the residuals. The algorithm

597 constructs the cost function as a sum of squares of the residuals, which

598 gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.

599

600 >>> from scipy.optimize import least_squares

601 >>> x0_rosenbrock = np.array([2, 2])

602 >>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)

603 >>> res_1.x

604 array([ 1., 1.])

605 >>> res_1.cost

606 9.8669242910846867e-30

607 >>> res_1.optimality

608 8.8928864934219529e-14

609

610 We now constrain the variables, in such a way that the previous solution

611 becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and

612 ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter

613 to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.

614

615 We also provide the analytic Jacobian:

616

617 >>> def jac_rosenbrock(x):

618 ... return np.array([

619 ... [-20 * x[0], 10],

620 ... [-1, 0]])

621

622 Putting this all together, we see that the new solution lies on the bound:

623

624 >>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,

625 ... bounds=([-np.inf, 1.5], np.inf))

626 >>> res_2.x

627 array([ 1.22437075, 1.5 ])

628 >>> res_2.cost

629 0.025213093946805685

630 >>> res_2.optimality

631 1.5885401433157753e-07

632

633 Now we solve a system of equations (i.e., the cost function should be zero

634 at a minimum) for a Broyden tridiagonal vector-valued function of 100000

635 variables:

636

637 >>> def fun_broyden(x):

638 ... f = (3 - x) * x + 1

639 ... f[1:] -= x[:-1]

640 ... f[:-1] -= 2 * x[1:]

641 ... return f

642

643 The corresponding Jacobian matrix is sparse. We tell the algorithm to

644 estimate it by finite differences and provide the sparsity structure of

645 Jacobian to significantly speed up this process.

646

647 >>> from scipy.sparse import lil_matrix

648 >>> def sparsity_broyden(n):

649 ... sparsity = lil_matrix((n, n), dtype=int)

650 ... i = np.arange(n)

651 ... sparsity[i, i] = 1

652 ... i = np.arange(1, n)

653 ... sparsity[i, i - 1] = 1

654 ... i = np.arange(n - 1)

655 ... sparsity[i, i + 1] = 1

656 ... return sparsity

657 ...

658 >>> n = 100000

659 >>> x0_broyden = -np.ones(n)

660 ...

661 >>> res_3 = least_squares(fun_broyden, x0_broyden,

662 ... jac_sparsity=sparsity_broyden(n))

663 >>> res_3.cost

664 4.5687069299604613e-23

665 >>> res_3.optimality

666 1.1650454296851518e-11

667

668 Let's also solve a curve fitting problem using robust loss function to

669 take care of outliers in the data. Define the model function as

670 ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an

671 observation and a, b, c are parameters to estimate.

672

673 First, define the function which generates the data with noise and

674 outliers, define the model parameters, and generate data:

675

676 >>> from numpy.random import default_rng

677 >>> rng = default_rng()

678 >>> def gen_data(t, a, b, c, noise=0., n_outliers=0, seed=None):

679 ... rng = default_rng(seed)

680 ...

681 ... y = a + b * np.exp(t * c)

682 ...

683 ... error = noise * rng.standard_normal(t.size)

684 ... outliers = rng.integers(0, t.size, n_outliers)

685 ... error[outliers] *= 10

686 ...

687 ... return y + error

688 ...

689 >>> a = 0.5

690 >>> b = 2.0

691 >>> c = -1

692 >>> t_min = 0

693 >>> t_max = 10

694 >>> n_points = 15

695 ...

696 >>> t_train = np.linspace(t_min, t_max, n_points)

697 >>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)

698

699 Define function for computing residuals and initial estimate of

700 parameters.

701

702 >>> def fun(x, t, y):

703 ... return x[0] + x[1] * np.exp(x[2] * t) - y

704 ...

705 >>> x0 = np.array([1.0, 1.0, 0.0])

706

707 Compute a standard least-squares solution:

708

709 >>> res_lsq = least_squares(fun, x0, args=(t_train, y_train))

710

711 Now compute two solutions with two different robust loss functions. The

712 parameter `f_scale` is set to 0.1, meaning that inlier residuals should

713 not significantly exceed 0.1 (the noise level used).

714

715 >>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,

716 ... args=(t_train, y_train))

717 >>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,

718 ... args=(t_train, y_train))

719

720 And, finally, plot all the curves. We see that by selecting an appropriate

721 `loss` we can get estimates close to optimal even in the presence of

722 strong outliers. But keep in mind that generally it is recommended to try

723 'soft_l1' or 'huber' losses first (if at all necessary) as the other two

724 options may cause difficulties in optimization process.

725

726 >>> t_test = np.linspace(t_min, t_max, n_points * 10)

727 >>> y_true = gen_data(t_test, a, b, c)

728 >>> y_lsq = gen_data(t_test, *res_lsq.x)

729 >>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x)

730 >>> y_log = gen_data(t_test, *res_log.x)

731 ...

732 >>> import matplotlib.pyplot as plt

733 >>> plt.plot(t_train, y_train, 'o')

734 >>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true')

735 >>> plt.plot(t_test, y_lsq, label='linear loss')

736 >>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss')

737 >>> plt.plot(t_test, y_log, label='cauchy loss')

738 >>> plt.xlabel("t")

739 >>> plt.ylabel("y")

740 >>> plt.legend()

741 >>> plt.show()

742

743 In the next example, we show how complex-valued residual functions of

744 complex variables can be optimized with ``least_squares()``. Consider the

745 following function:

746

747 >>> def f(z):

748 ... return z - (0.5 + 0.5j)

749

750 We wrap it into a function of real variables that returns real residuals

751 by simply handling the real and imaginary parts as independent variables:

752

753 >>> def f_wrap(x):

754 ... fx = f(x[0] + 1j*x[1])

755 ... return np.array([fx.real, fx.imag])

756

757 Thus, instead of the original m-D complex function of n complex

758 variables we optimize a 2m-D real function of 2n real variables:

759

760 >>> from scipy.optimize import least_squares

761 >>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1]))

762 >>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j

763 >>> z

764 (0.49999999999925893+0.49999999999925893j)

765

766 """

767 if method not in ['trf', 'dogbox', 'lm']:

768 raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.")

769

770 if jac not in ['2-point', '3-point', 'cs'] and not callable(jac):

771 raise ValueError("`jac` must be '2-point', '3-point', 'cs' or "

772 "callable.")

773

774 if tr_solver not in [None, 'exact', 'lsmr']:

775 raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.")

776

777 if loss not in IMPLEMENTED_LOSSES and not callable(loss):

778 raise ValueError("`loss` must be one of {0} or a callable."

779 .format(IMPLEMENTED_LOSSES.keys()))

780

781 if method == 'lm' and loss != 'linear':

782 raise ValueError("method='lm' supports only 'linear' loss function.")

783

784 if verbose not in [0, 1, 2]:

785 raise ValueError("`verbose` must be in [0, 1, 2].")

786

787 if max_nfev is not None and max_nfev <= 0:

788 raise ValueError("`max_nfev` must be None or positive integer.")

789

790 if np.iscomplexobj(x0):

791 raise ValueError("`x0` must be real.")

792

793 x0 = np.atleast_1d(x0).astype(float)

794

795 if x0.ndim > 1:

796 raise ValueError("`x0` must have at most 1 dimension.")

797

798 if isinstance(bounds, Bounds):

799 lb, ub = bounds.lb, bounds.ub

800 bounds = (lb, ub)

801 else:

802 if len(bounds) == 2:

803 lb, ub = prepare_bounds(bounds, x0.shape[0])

804 else:

805 raise ValueError("`bounds` must contain 2 elements.")

806

807 if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)):

808 raise ValueError("Method 'lm' doesn't support bounds.")

809

810 if lb.shape != x0.shape or ub.shape != x0.shape:

811 raise ValueError("Inconsistent shapes between bounds and `x0`.")

812

813 if np.any(lb >= ub):

814 raise ValueError("Each lower bound must be strictly less than each "

815 "upper bound.")

816

817 if not in_bounds(x0, lb, ub):

818 raise ValueError("`x0` is infeasible.")

819

820 x_scale = check_x_scale(x_scale, x0)

821

822 ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)

823

824 def fun_wrapped(x):

825 return np.atleast_1d(fun(x, *args, **kwargs))

826

827 if method == 'trf':

828 x0 = make_strictly_feasible(x0, lb, ub)

829

830 f0 = fun_wrapped(x0)

831

832 if f0.ndim != 1:

833 raise ValueError("`fun` must return at most 1-d array_like. "

834 "f0.shape: {0}".format(f0.shape))

835

836 if not np.all(np.isfinite(f0)):

837 raise ValueError("Residuals are not finite in the initial point.")

838

839 n = x0.size

840 m = f0.size

841

842 if method == 'lm' and m < n:

843 raise ValueError("Method 'lm' doesn't work when the number of "

844 "residuals is less than the number of variables.")

845

846 loss_function = construct_loss_function(m, loss, f_scale)

847 if callable(loss):

848 rho = loss_function(f0)

849 if rho.shape != (3, m):

850 raise ValueError("The return value of `loss` callable has wrong "

851 "shape.")

852 initial_cost = 0.5 * np.sum(rho[0])

853 elif loss_function is not None:

854 initial_cost = loss_function(f0, cost_only=True)

855 else:

856 initial_cost = 0.5 * np.dot(f0, f0)

857

858 if callable(jac):

859 J0 = jac(x0, *args, **kwargs)

860

861 if issparse(J0):

862 J0 = J0.tocsr()

863

864 def jac_wrapped(x, _=None):

865 return jac(x, *args, **kwargs).tocsr()

866

867 elif isinstance(J0, LinearOperator):

868 def jac_wrapped(x, _=None):

869 return jac(x, *args, **kwargs)

870

871 else:

872 J0 = np.atleast_2d(J0)

873

874 def jac_wrapped(x, _=None):

875 return np.atleast_2d(jac(x, *args, **kwargs))

876

877 else: # Estimate Jacobian by finite differences.

878 if method == 'lm':

879 if jac_sparsity is not None:

880 raise ValueError("method='lm' does not support "

881 "`jac_sparsity`.")

882

883 if jac != '2-point':

884 warn("jac='{0}' works equivalently to '2-point' "

885 "for method='lm'.".format(jac))

886

887 J0 = jac_wrapped = None

888 else:

889 if jac_sparsity is not None and tr_solver == 'exact':

890 raise ValueError("tr_solver='exact' is incompatible "

891 "with `jac_sparsity`.")

892

893 jac_sparsity = check_jac_sparsity(jac_sparsity, m, n)

894

895 def jac_wrapped(x, f):

896 J = approx_derivative(fun, x, rel_step=diff_step, method=jac,

897 f0=f, bounds=bounds, args=args,

898 kwargs=kwargs, sparsity=jac_sparsity)

899 if J.ndim != 2: # J is guaranteed not sparse.

900 J = np.atleast_2d(J)

901

902 return J

903

904 J0 = jac_wrapped(x0, f0)

905

906 if J0 is not None:

907 if J0.shape != (m, n):

908 raise ValueError(

909 "The return value of `jac` has wrong shape: expected {0}, "

910 "actual {1}.".format((m, n), J0.shape))

911

912 if not isinstance(J0, np.ndarray):

913 if method == 'lm':

914 raise ValueError("method='lm' works only with dense "

915 "Jacobian matrices.")

916

917 if tr_solver == 'exact':

918 raise ValueError(

919 "tr_solver='exact' works only with dense "

920 "Jacobian matrices.")

921

922 jac_scale = isinstance(x_scale, str) and x_scale == 'jac'

923 if isinstance(J0, LinearOperator) and jac_scale:

924 raise ValueError("x_scale='jac' can't be used when `jac` "

925 "returns LinearOperator.")

926

927 if tr_solver is None:

928 if isinstance(J0, np.ndarray):

929 tr_solver = 'exact'

930 else:

931 tr_solver = 'lsmr'

932

933 if method == 'lm':

934 result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol,

935 max_nfev, x_scale, diff_step)

936

937 elif method == 'trf':

938 result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,

939 gtol, max_nfev, x_scale, loss_function, tr_solver,

940 tr_options.copy(), verbose)

941

942 elif method == 'dogbox':

943 if tr_solver == 'lsmr' and 'regularize' in tr_options:

944 warn("The keyword 'regularize' in `tr_options` is not relevant "

945 "for 'dogbox' method.")

946 tr_options = tr_options.copy()

947 del tr_options['regularize']

948

949 result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol,

950 xtol, gtol, max_nfev, x_scale, loss_function,

951 tr_solver, tr_options, verbose)

952

953 result.message = TERMINATION_MESSAGES[result.status]

954 result.success = result.status > 0

955

956 if verbose >= 1:

957 print(result.message)

958 print("Function evaluations {0}, initial cost {1:.4e}, final cost "

959 "{2:.4e}, first-order optimality {3:.2e}."

960 .format(result.nfev, initial_cost, result.cost,

961 result.optimality))

962

963 return result

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lsq/least_squares.py: 10%

259 statements