Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lsq/least_squares.py: 10%

259 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1"""Generic interface for least-squares minimization.""" 

2from warnings import warn 

3 

4import numpy as np 

5from numpy.linalg import norm 

6 

7from scipy.sparse import issparse 

8from scipy.sparse.linalg import LinearOperator 

9from scipy.optimize import _minpack, OptimizeResult 

10from scipy.optimize._numdiff import approx_derivative, group_columns 

11from scipy.optimize._minimize import Bounds 

12 

13from .trf import trf 

14from .dogbox import dogbox 

15from .common import EPS, in_bounds, make_strictly_feasible 

16 

17 

18TERMINATION_MESSAGES = { 

19 -1: "Improper input parameters status returned from `leastsq`", 

20 0: "The maximum number of function evaluations is exceeded.", 

21 1: "`gtol` termination condition is satisfied.", 

22 2: "`ftol` termination condition is satisfied.", 

23 3: "`xtol` termination condition is satisfied.", 

24 4: "Both `ftol` and `xtol` termination conditions are satisfied." 

25} 

26 

27 

28FROM_MINPACK_TO_COMMON = { 

29 0: -1, # Improper input parameters from MINPACK. 

30 1: 2, 

31 2: 3, 

32 3: 4, 

33 4: 1, 

34 5: 0 

35 # There are 6, 7, 8 for too small tolerance parameters, 

36 # but we guard against it by checking ftol, xtol, gtol beforehand. 

37} 

38 

39 

40def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step): 

41 n = x0.size 

42 

43 if diff_step is None: 

44 epsfcn = EPS 

45 else: 

46 epsfcn = diff_step**2 

47 

48 # Compute MINPACK's `diag`, which is inverse of our `x_scale` and 

49 # ``x_scale='jac'`` corresponds to ``diag=None``. 

50 if isinstance(x_scale, str) and x_scale == 'jac': 

51 diag = None 

52 else: 

53 diag = 1 / x_scale 

54 

55 full_output = True 

56 col_deriv = False 

57 factor = 100.0 

58 

59 if jac is None: 

60 if max_nfev is None: 

61 # n squared to account for Jacobian evaluations. 

62 max_nfev = 100 * n * (n + 1) 

63 x, info, status = _minpack._lmdif( 

64 fun, x0, (), full_output, ftol, xtol, gtol, 

65 max_nfev, epsfcn, factor, diag) 

66 else: 

67 if max_nfev is None: 

68 max_nfev = 100 * n 

69 x, info, status = _minpack._lmder( 

70 fun, jac, x0, (), full_output, col_deriv, 

71 ftol, xtol, gtol, max_nfev, factor, diag) 

72 

73 f = info['fvec'] 

74 

75 if callable(jac): 

76 J = jac(x) 

77 else: 

78 J = np.atleast_2d(approx_derivative(fun, x)) 

79 

80 cost = 0.5 * np.dot(f, f) 

81 g = J.T.dot(f) 

82 g_norm = norm(g, ord=np.inf) 

83 

84 nfev = info['nfev'] 

85 njev = info.get('njev', None) 

86 

87 status = FROM_MINPACK_TO_COMMON[status] 

88 active_mask = np.zeros_like(x0, dtype=int) 

89 

90 return OptimizeResult( 

91 x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm, 

92 active_mask=active_mask, nfev=nfev, njev=njev, status=status) 

93 

94 

95def prepare_bounds(bounds, n): 

96 lb, ub = [np.asarray(b, dtype=float) for b in bounds] 

97 if lb.ndim == 0: 

98 lb = np.resize(lb, n) 

99 

100 if ub.ndim == 0: 

101 ub = np.resize(ub, n) 

102 

103 return lb, ub 

104 

105 

106def check_tolerance(ftol, xtol, gtol, method): 

107 def check(tol, name): 

108 if tol is None: 

109 tol = 0 

110 elif tol < EPS: 

111 warn("Setting `{}` below the machine epsilon ({:.2e}) effectively " 

112 "disables the corresponding termination condition." 

113 .format(name, EPS)) 

114 return tol 

115 

116 ftol = check(ftol, "ftol") 

117 xtol = check(xtol, "xtol") 

118 gtol = check(gtol, "gtol") 

119 

120 if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS): 

121 raise ValueError("All tolerances must be higher than machine epsilon " 

122 "({:.2e}) for method 'lm'.".format(EPS)) 

123 elif ftol < EPS and xtol < EPS and gtol < EPS: 

124 raise ValueError("At least one of the tolerances must be higher than " 

125 "machine epsilon ({:.2e}).".format(EPS)) 

126 

127 return ftol, xtol, gtol 

128 

129 

130def check_x_scale(x_scale, x0): 

131 if isinstance(x_scale, str) and x_scale == 'jac': 

132 return x_scale 

133 

134 try: 

135 x_scale = np.asarray(x_scale, dtype=float) 

136 valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0) 

137 except (ValueError, TypeError): 

138 valid = False 

139 

140 if not valid: 

141 raise ValueError("`x_scale` must be 'jac' or array_like with " 

142 "positive numbers.") 

143 

144 if x_scale.ndim == 0: 

145 x_scale = np.resize(x_scale, x0.shape) 

146 

147 if x_scale.shape != x0.shape: 

148 raise ValueError("Inconsistent shapes between `x_scale` and `x0`.") 

149 

150 return x_scale 

151 

152 

153def check_jac_sparsity(jac_sparsity, m, n): 

154 if jac_sparsity is None: 

155 return None 

156 

157 if not issparse(jac_sparsity): 

158 jac_sparsity = np.atleast_2d(jac_sparsity) 

159 

160 if jac_sparsity.shape != (m, n): 

161 raise ValueError("`jac_sparsity` has wrong shape.") 

162 

163 return jac_sparsity, group_columns(jac_sparsity) 

164 

165 

166# Loss functions. 

167 

168 

169def huber(z, rho, cost_only): 

170 mask = z <= 1 

171 rho[0, mask] = z[mask] 

172 rho[0, ~mask] = 2 * z[~mask]**0.5 - 1 

173 if cost_only: 

174 return 

175 rho[1, mask] = 1 

176 rho[1, ~mask] = z[~mask]**-0.5 

177 rho[2, mask] = 0 

178 rho[2, ~mask] = -0.5 * z[~mask]**-1.5 

179 

180 

181def soft_l1(z, rho, cost_only): 

182 t = 1 + z 

183 rho[0] = 2 * (t**0.5 - 1) 

184 if cost_only: 

185 return 

186 rho[1] = t**-0.5 

187 rho[2] = -0.5 * t**-1.5 

188 

189 

190def cauchy(z, rho, cost_only): 

191 rho[0] = np.log1p(z) 

192 if cost_only: 

193 return 

194 t = 1 + z 

195 rho[1] = 1 / t 

196 rho[2] = -1 / t**2 

197 

198 

199def arctan(z, rho, cost_only): 

200 rho[0] = np.arctan(z) 

201 if cost_only: 

202 return 

203 t = 1 + z**2 

204 rho[1] = 1 / t 

205 rho[2] = -2 * z / t**2 

206 

207 

208IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1, 

209 cauchy=cauchy, arctan=arctan) 

210 

211 

212def construct_loss_function(m, loss, f_scale): 

213 if loss == 'linear': 

214 return None 

215 

216 if not callable(loss): 

217 loss = IMPLEMENTED_LOSSES[loss] 

218 rho = np.empty((3, m)) 

219 

220 def loss_function(f, cost_only=False): 

221 z = (f / f_scale) ** 2 

222 loss(z, rho, cost_only=cost_only) 

223 if cost_only: 

224 return 0.5 * f_scale ** 2 * np.sum(rho[0]) 

225 rho[0] *= f_scale ** 2 

226 rho[2] /= f_scale ** 2 

227 return rho 

228 else: 

229 def loss_function(f, cost_only=False): 

230 z = (f / f_scale) ** 2 

231 rho = loss(z) 

232 if cost_only: 

233 return 0.5 * f_scale ** 2 * np.sum(rho[0]) 

234 rho[0] *= f_scale ** 2 

235 rho[2] /= f_scale ** 2 

236 return rho 

237 

238 return loss_function 

239 

240 

241def least_squares( 

242 fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf', 

243 ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear', 

244 f_scale=1.0, diff_step=None, tr_solver=None, tr_options={}, 

245 jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}): 

246 """Solve a nonlinear least-squares problem with bounds on the variables. 

247 

248 Given the residuals f(x) (an m-D real function of n real 

249 variables) and the loss function rho(s) (a scalar function), `least_squares` 

250 finds a local minimum of the cost function F(x):: 

251 

252 minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1) 

253 subject to lb <= x <= ub 

254 

255 The purpose of the loss function rho(s) is to reduce the influence of 

256 outliers on the solution. 

257 

258 Parameters 

259 ---------- 

260 fun : callable 

261 Function which computes the vector of residuals, with the signature 

262 ``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with 

263 respect to its first argument. The argument ``x`` passed to this 

264 function is an ndarray of shape (n,) (never a scalar, even for n=1). 

265 It must allocate and return a 1-D array_like of shape (m,) or a scalar. 

266 If the argument ``x`` is complex or the function ``fun`` returns 

267 complex residuals, it must be wrapped in a real function of real 

268 arguments, as shown at the end of the Examples section. 

269 x0 : array_like with shape (n,) or float 

270 Initial guess on independent variables. If float, it will be treated 

271 as a 1-D array with one element. 

272 jac : {'2-point', '3-point', 'cs', callable}, optional 

273 Method of computing the Jacobian matrix (an m-by-n matrix, where 

274 element (i, j) is the partial derivative of f[i] with respect to 

275 x[j]). The keywords select a finite difference scheme for numerical 

276 estimation. The scheme '3-point' is more accurate, but requires 

277 twice as many operations as '2-point' (default). The scheme 'cs' 

278 uses complex steps, and while potentially the most accurate, it is 

279 applicable only when `fun` correctly handles complex inputs and 

280 can be analytically continued to the complex plane. Method 'lm' 

281 always uses the '2-point' scheme. If callable, it is used as 

282 ``jac(x, *args, **kwargs)`` and should return a good approximation 

283 (or the exact value) for the Jacobian as an array_like (np.atleast_2d 

284 is applied), a sparse matrix (csr_matrix preferred for performance) or 

285 a `scipy.sparse.linalg.LinearOperator`. 

286 bounds : 2-tuple of array_like or `Bounds`, optional 

287 There are two ways to specify bounds: 

288 

289 1. Instance of `Bounds` class 

290 2. Lower and upper bounds on independent variables. Defaults to no 

291 bounds. Each array must match the size of `x0` or be a scalar, 

292 in the latter case a bound will be the same for all variables. 

293 Use ``np.inf`` with an appropriate sign to disable bounds on all 

294 or some variables. 

295 method : {'trf', 'dogbox', 'lm'}, optional 

296 Algorithm to perform minimization. 

297 

298 * 'trf' : Trust Region Reflective algorithm, particularly suitable 

299 for large sparse problems with bounds. Generally robust method. 

300 * 'dogbox' : dogleg algorithm with rectangular trust regions, 

301 typical use case is small problems with bounds. Not recommended 

302 for problems with rank-deficient Jacobian. 

303 * 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK. 

304 Doesn't handle bounds and sparse Jacobians. Usually the most 

305 efficient method for small unconstrained problems. 

306 

307 Default is 'trf'. See Notes for more information. 

308 ftol : float or None, optional 

309 Tolerance for termination by the change of the cost function. Default 

310 is 1e-8. The optimization process is stopped when ``dF < ftol * F``, 

311 and there was an adequate agreement between a local quadratic model and 

312 the true model in the last step. 

313 

314 If None and 'method' is not 'lm', the termination by this condition is 

315 disabled. If 'method' is 'lm', this tolerance must be higher than 

316 machine epsilon. 

317 xtol : float or None, optional 

318 Tolerance for termination by the change of the independent variables. 

319 Default is 1e-8. The exact condition depends on the `method` used: 

320 

321 * For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``. 

322 * For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is 

323 a trust-region radius and ``xs`` is the value of ``x`` 

324 scaled according to `x_scale` parameter (see below). 

325 

326 If None and 'method' is not 'lm', the termination by this condition is 

327 disabled. If 'method' is 'lm', this tolerance must be higher than 

328 machine epsilon. 

329 gtol : float or None, optional 

330 Tolerance for termination by the norm of the gradient. Default is 1e-8. 

331 The exact condition depends on a `method` used: 

332 

333 * For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where 

334 ``g_scaled`` is the value of the gradient scaled to account for 

335 the presence of the bounds [STIR]_. 

336 * For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where 

337 ``g_free`` is the gradient with respect to the variables which 

338 are not in the optimal state on the boundary. 

339 * For 'lm' : the maximum absolute value of the cosine of angles 

340 between columns of the Jacobian and the residual vector is less 

341 than `gtol`, or the residual vector is zero. 

342 

343 If None and 'method' is not 'lm', the termination by this condition is 

344 disabled. If 'method' is 'lm', this tolerance must be higher than 

345 machine epsilon. 

346 x_scale : array_like or 'jac', optional 

347 Characteristic scale of each variable. Setting `x_scale` is equivalent 

348 to reformulating the problem in scaled variables ``xs = x / x_scale``. 

349 An alternative view is that the size of a trust region along jth 

350 dimension is proportional to ``x_scale[j]``. Improved convergence may 

351 be achieved by setting `x_scale` such that a step of a given size 

352 along any of the scaled variables has a similar effect on the cost 

353 function. If set to 'jac', the scale is iteratively updated using the 

354 inverse norms of the columns of the Jacobian matrix (as described in 

355 [JJMore]_). 

356 loss : str or callable, optional 

357 Determines the loss function. The following keyword values are allowed: 

358 

359 * 'linear' (default) : ``rho(z) = z``. Gives a standard 

360 least-squares problem. 

361 * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth 

362 approximation of l1 (absolute value) loss. Usually a good 

363 choice for robust least squares. 

364 * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works 

365 similarly to 'soft_l1'. 

366 * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers 

367 influence, but may cause difficulties in optimization process. 

368 * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on 

369 a single residual, has properties similar to 'cauchy'. 

370 

371 If callable, it must take a 1-D ndarray ``z=f**2`` and return an 

372 array_like with shape (3, m) where row 0 contains function values, 

373 row 1 contains first derivatives and row 2 contains second 

374 derivatives. Method 'lm' supports only 'linear' loss. 

375 f_scale : float, optional 

376 Value of soft margin between inlier and outlier residuals, default 

377 is 1.0. The loss function is evaluated as follows 

378 ``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`, 

379 and ``rho`` is determined by `loss` parameter. This parameter has 

380 no effect with ``loss='linear'``, but for other `loss` values it is 

381 of crucial importance. 

382 max_nfev : None or int, optional 

383 Maximum number of function evaluations before the termination. 

384 If None (default), the value is chosen automatically: 

385 

386 * For 'trf' and 'dogbox' : 100 * n. 

387 * For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1) 

388 otherwise (because 'lm' counts function calls in Jacobian 

389 estimation). 

390 

391 diff_step : None or array_like, optional 

392 Determines the relative step size for the finite difference 

393 approximation of the Jacobian. The actual step is computed as 

394 ``x * diff_step``. If None (default), then `diff_step` is taken to be 

395 a conventional "optimal" power of machine epsilon for the finite 

396 difference scheme used [NR]_. 

397 tr_solver : {None, 'exact', 'lsmr'}, optional 

398 Method for solving trust-region subproblems, relevant only for 'trf' 

399 and 'dogbox' methods. 

400 

401 * 'exact' is suitable for not very large problems with dense 

402 Jacobian matrices. The computational complexity per iteration is 

403 comparable to a singular value decomposition of the Jacobian 

404 matrix. 

405 * 'lsmr' is suitable for problems with sparse and large Jacobian 

406 matrices. It uses the iterative procedure 

407 `scipy.sparse.linalg.lsmr` for finding a solution of a linear 

408 least-squares problem and only requires matrix-vector product 

409 evaluations. 

410 

411 If None (default), the solver is chosen based on the type of Jacobian 

412 returned on the first iteration. 

413 tr_options : dict, optional 

414 Keyword options passed to trust-region solver. 

415 

416 * ``tr_solver='exact'``: `tr_options` are ignored. 

417 * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`. 

418 Additionally, ``method='trf'`` supports 'regularize' option 

419 (bool, default is True), which adds a regularization term to the 

420 normal equation, which improves convergence if the Jacobian is 

421 rank-deficient [Byrd]_ (eq. 3.4). 

422 

423 jac_sparsity : {None, array_like, sparse matrix}, optional 

424 Defines the sparsity structure of the Jacobian matrix for finite 

425 difference estimation, its shape must be (m, n). If the Jacobian has 

426 only few non-zero elements in *each* row, providing the sparsity 

427 structure will greatly speed up the computations [Curtis]_. A zero 

428 entry means that a corresponding element in the Jacobian is identically 

429 zero. If provided, forces the use of 'lsmr' trust-region solver. 

430 If None (default), then dense differencing will be used. Has no effect 

431 for 'lm' method. 

432 verbose : {0, 1, 2}, optional 

433 Level of algorithm's verbosity: 

434 

435 * 0 (default) : work silently. 

436 * 1 : display a termination report. 

437 * 2 : display progress during iterations (not supported by 'lm' 

438 method). 

439 

440 args, kwargs : tuple and dict, optional 

441 Additional arguments passed to `fun` and `jac`. Both empty by default. 

442 The calling signature is ``fun(x, *args, **kwargs)`` and the same for 

443 `jac`. 

444 

445 Returns 

446 ------- 

447 result : OptimizeResult 

448 `OptimizeResult` with the following fields defined: 

449 

450 x : ndarray, shape (n,) 

451 Solution found. 

452 cost : float 

453 Value of the cost function at the solution. 

454 fun : ndarray, shape (m,) 

455 Vector of residuals at the solution. 

456 jac : ndarray, sparse matrix or LinearOperator, shape (m, n) 

457 Modified Jacobian matrix at the solution, in the sense that J^T J 

458 is a Gauss-Newton approximation of the Hessian of the cost function. 

459 The type is the same as the one used by the algorithm. 

460 grad : ndarray, shape (m,) 

461 Gradient of the cost function at the solution. 

462 optimality : float 

463 First-order optimality measure. In unconstrained problems, it is 

464 always the uniform norm of the gradient. In constrained problems, 

465 it is the quantity which was compared with `gtol` during iterations. 

466 active_mask : ndarray of int, shape (n,) 

467 Each component shows whether a corresponding constraint is active 

468 (that is, whether a variable is at the bound): 

469 

470 * 0 : a constraint is not active. 

471 * -1 : a lower bound is active. 

472 * 1 : an upper bound is active. 

473 

474 Might be somewhat arbitrary for 'trf' method as it generates a 

475 sequence of strictly feasible iterates and `active_mask` is 

476 determined within a tolerance threshold. 

477 nfev : int 

478 Number of function evaluations done. Methods 'trf' and 'dogbox' do 

479 not count function calls for numerical Jacobian approximation, as 

480 opposed to 'lm' method. 

481 njev : int or None 

482 Number of Jacobian evaluations done. If numerical Jacobian 

483 approximation is used in 'lm' method, it is set to None. 

484 status : int 

485 The reason for algorithm termination: 

486 

487 * -1 : improper input parameters status returned from MINPACK. 

488 * 0 : the maximum number of function evaluations is exceeded. 

489 * 1 : `gtol` termination condition is satisfied. 

490 * 2 : `ftol` termination condition is satisfied. 

491 * 3 : `xtol` termination condition is satisfied. 

492 * 4 : Both `ftol` and `xtol` termination conditions are satisfied. 

493 

494 message : str 

495 Verbal description of the termination reason. 

496 success : bool 

497 True if one of the convergence criteria is satisfied (`status` > 0). 

498 

499 See Also 

500 -------- 

501 leastsq : A legacy wrapper for the MINPACK implementation of the 

502 Levenberg-Marquadt algorithm. 

503 curve_fit : Least-squares minimization applied to a curve-fitting problem. 

504 

505 Notes 

506 ----- 

507 Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares 

508 algorithms implemented in MINPACK (lmder, lmdif). It runs the 

509 Levenberg-Marquardt algorithm formulated as a trust-region type algorithm. 

510 The implementation is based on paper [JJMore]_, it is very robust and 

511 efficient with a lot of smart tricks. It should be your first choice 

512 for unconstrained problems. Note that it doesn't support bounds. Also, 

513 it doesn't work when m < n. 

514 

515 Method 'trf' (Trust Region Reflective) is motivated by the process of 

516 solving a system of equations, which constitute the first-order optimality 

517 condition for a bound-constrained minimization problem as formulated in 

518 [STIR]_. The algorithm iteratively solves trust-region subproblems 

519 augmented by a special diagonal quadratic term and with trust-region shape 

520 determined by the distance from the bounds and the direction of the 

521 gradient. This enhancements help to avoid making steps directly into bounds 

522 and efficiently explore the whole space of variables. To further improve 

523 convergence, the algorithm considers search directions reflected from the 

524 bounds. To obey theoretical requirements, the algorithm keeps iterates 

525 strictly feasible. With dense Jacobians trust-region subproblems are 

526 solved by an exact method very similar to the one described in [JJMore]_ 

527 (and implemented in MINPACK). The difference from the MINPACK 

528 implementation is that a singular value decomposition of a Jacobian 

529 matrix is done once per iteration, instead of a QR decomposition and series 

530 of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace 

531 approach of solving trust-region subproblems is used [STIR]_, [Byrd]_. 

532 The subspace is spanned by a scaled gradient and an approximate 

533 Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no 

534 constraints are imposed the algorithm is very similar to MINPACK and has 

535 generally comparable performance. The algorithm works quite robust in 

536 unbounded and bounded problems, thus it is chosen as a default algorithm. 

537 

538 Method 'dogbox' operates in a trust-region framework, but considers 

539 rectangular trust regions as opposed to conventional ellipsoids [Voglis]_. 

540 The intersection of a current trust region and initial bounds is again 

541 rectangular, so on each iteration a quadratic minimization problem subject 

542 to bound constraints is solved approximately by Powell's dogleg method 

543 [NumOpt]_. The required Gauss-Newton step can be computed exactly for 

544 dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large 

545 sparse Jacobians. The algorithm is likely to exhibit slow convergence when 

546 the rank of Jacobian is less than the number of variables. The algorithm 

547 often outperforms 'trf' in bounded problems with a small number of 

548 variables. 

549 

550 Robust loss functions are implemented as described in [BA]_. The idea 

551 is to modify a residual vector and a Jacobian matrix on each iteration 

552 such that computed gradient and Gauss-Newton Hessian approximation match 

553 the true gradient and Hessian approximation of the cost function. Then 

554 the algorithm proceeds in a normal way, i.e., robust loss functions are 

555 implemented as a simple wrapper over standard least-squares algorithms. 

556 

557 .. versionadded:: 0.17.0 

558 

559 References 

560 ---------- 

561 .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior, 

562 and Conjugate Gradient Method for Large-Scale Bound-Constrained 

563 Minimization Problems," SIAM Journal on Scientific Computing, 

564 Vol. 21, Number 1, pp 1-23, 1999. 

565 .. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific 

566 Computing. 3rd edition", Sec. 5.7. 

567 .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate 

568 solution of the trust region problem by minimization over 

569 two-dimensional subspaces", Math. Programming, 40, pp. 247-263, 

570 1988. 

571 .. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of 

572 sparse Jacobian matrices", Journal of the Institute of 

573 Mathematics and its Applications, 13, pp. 117-120, 1974. 

574 .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation 

575 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture 

576 Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977. 

577 .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region 

578 Dogleg Approach for Unconstrained and Bound Constrained 

579 Nonlinear Optimization", WSEAS International Conference on 

580 Applied Mathematics, Corfu, Greece, 2004. 

581 .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 

582 2nd edition", Chapter 4. 

583 .. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis", 

584 Proceedings of the International Workshop on Vision Algorithms: 

585 Theory and Practice, pp. 298-372, 1999. 

586 

587 Examples 

588 -------- 

589 In this example we find a minimum of the Rosenbrock function without bounds 

590 on independent variables. 

591 

592 >>> import numpy as np 

593 >>> def fun_rosenbrock(x): 

594 ... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])]) 

595 

596 Notice that we only provide the vector of the residuals. The algorithm 

597 constructs the cost function as a sum of squares of the residuals, which 

598 gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``. 

599 

600 >>> from scipy.optimize import least_squares 

601 >>> x0_rosenbrock = np.array([2, 2]) 

602 >>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock) 

603 >>> res_1.x 

604 array([ 1., 1.]) 

605 >>> res_1.cost 

606 9.8669242910846867e-30 

607 >>> res_1.optimality 

608 8.8928864934219529e-14 

609 

610 We now constrain the variables, in such a way that the previous solution 

611 becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and 

612 ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter 

613 to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``. 

614 

615 We also provide the analytic Jacobian: 

616 

617 >>> def jac_rosenbrock(x): 

618 ... return np.array([ 

619 ... [-20 * x[0], 10], 

620 ... [-1, 0]]) 

621 

622 Putting this all together, we see that the new solution lies on the bound: 

623 

624 >>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock, 

625 ... bounds=([-np.inf, 1.5], np.inf)) 

626 >>> res_2.x 

627 array([ 1.22437075, 1.5 ]) 

628 >>> res_2.cost 

629 0.025213093946805685 

630 >>> res_2.optimality 

631 1.5885401433157753e-07 

632 

633 Now we solve a system of equations (i.e., the cost function should be zero 

634 at a minimum) for a Broyden tridiagonal vector-valued function of 100000 

635 variables: 

636 

637 >>> def fun_broyden(x): 

638 ... f = (3 - x) * x + 1 

639 ... f[1:] -= x[:-1] 

640 ... f[:-1] -= 2 * x[1:] 

641 ... return f 

642 

643 The corresponding Jacobian matrix is sparse. We tell the algorithm to 

644 estimate it by finite differences and provide the sparsity structure of 

645 Jacobian to significantly speed up this process. 

646 

647 >>> from scipy.sparse import lil_matrix 

648 >>> def sparsity_broyden(n): 

649 ... sparsity = lil_matrix((n, n), dtype=int) 

650 ... i = np.arange(n) 

651 ... sparsity[i, i] = 1 

652 ... i = np.arange(1, n) 

653 ... sparsity[i, i - 1] = 1 

654 ... i = np.arange(n - 1) 

655 ... sparsity[i, i + 1] = 1 

656 ... return sparsity 

657 ... 

658 >>> n = 100000 

659 >>> x0_broyden = -np.ones(n) 

660 ... 

661 >>> res_3 = least_squares(fun_broyden, x0_broyden, 

662 ... jac_sparsity=sparsity_broyden(n)) 

663 >>> res_3.cost 

664 4.5687069299604613e-23 

665 >>> res_3.optimality 

666 1.1650454296851518e-11 

667 

668 Let's also solve a curve fitting problem using robust loss function to 

669 take care of outliers in the data. Define the model function as 

670 ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an 

671 observation and a, b, c are parameters to estimate. 

672 

673 First, define the function which generates the data with noise and 

674 outliers, define the model parameters, and generate data: 

675 

676 >>> from numpy.random import default_rng 

677 >>> rng = default_rng() 

678 >>> def gen_data(t, a, b, c, noise=0., n_outliers=0, seed=None): 

679 ... rng = default_rng(seed) 

680 ... 

681 ... y = a + b * np.exp(t * c) 

682 ... 

683 ... error = noise * rng.standard_normal(t.size) 

684 ... outliers = rng.integers(0, t.size, n_outliers) 

685 ... error[outliers] *= 10 

686 ... 

687 ... return y + error 

688 ... 

689 >>> a = 0.5 

690 >>> b = 2.0 

691 >>> c = -1 

692 >>> t_min = 0 

693 >>> t_max = 10 

694 >>> n_points = 15 

695 ... 

696 >>> t_train = np.linspace(t_min, t_max, n_points) 

697 >>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3) 

698 

699 Define function for computing residuals and initial estimate of 

700 parameters. 

701 

702 >>> def fun(x, t, y): 

703 ... return x[0] + x[1] * np.exp(x[2] * t) - y 

704 ... 

705 >>> x0 = np.array([1.0, 1.0, 0.0]) 

706 

707 Compute a standard least-squares solution: 

708 

709 >>> res_lsq = least_squares(fun, x0, args=(t_train, y_train)) 

710 

711 Now compute two solutions with two different robust loss functions. The 

712 parameter `f_scale` is set to 0.1, meaning that inlier residuals should 

713 not significantly exceed 0.1 (the noise level used). 

714 

715 >>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1, 

716 ... args=(t_train, y_train)) 

717 >>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1, 

718 ... args=(t_train, y_train)) 

719 

720 And, finally, plot all the curves. We see that by selecting an appropriate 

721 `loss` we can get estimates close to optimal even in the presence of 

722 strong outliers. But keep in mind that generally it is recommended to try 

723 'soft_l1' or 'huber' losses first (if at all necessary) as the other two 

724 options may cause difficulties in optimization process. 

725 

726 >>> t_test = np.linspace(t_min, t_max, n_points * 10) 

727 >>> y_true = gen_data(t_test, a, b, c) 

728 >>> y_lsq = gen_data(t_test, *res_lsq.x) 

729 >>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x) 

730 >>> y_log = gen_data(t_test, *res_log.x) 

731 ... 

732 >>> import matplotlib.pyplot as plt 

733 >>> plt.plot(t_train, y_train, 'o') 

734 >>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true') 

735 >>> plt.plot(t_test, y_lsq, label='linear loss') 

736 >>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss') 

737 >>> plt.plot(t_test, y_log, label='cauchy loss') 

738 >>> plt.xlabel("t") 

739 >>> plt.ylabel("y") 

740 >>> plt.legend() 

741 >>> plt.show() 

742 

743 In the next example, we show how complex-valued residual functions of 

744 complex variables can be optimized with ``least_squares()``. Consider the 

745 following function: 

746 

747 >>> def f(z): 

748 ... return z - (0.5 + 0.5j) 

749 

750 We wrap it into a function of real variables that returns real residuals 

751 by simply handling the real and imaginary parts as independent variables: 

752 

753 >>> def f_wrap(x): 

754 ... fx = f(x[0] + 1j*x[1]) 

755 ... return np.array([fx.real, fx.imag]) 

756 

757 Thus, instead of the original m-D complex function of n complex 

758 variables we optimize a 2m-D real function of 2n real variables: 

759 

760 >>> from scipy.optimize import least_squares 

761 >>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1])) 

762 >>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j 

763 >>> z 

764 (0.49999999999925893+0.49999999999925893j) 

765 

766 """ 

767 if method not in ['trf', 'dogbox', 'lm']: 

768 raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.") 

769 

770 if jac not in ['2-point', '3-point', 'cs'] and not callable(jac): 

771 raise ValueError("`jac` must be '2-point', '3-point', 'cs' or " 

772 "callable.") 

773 

774 if tr_solver not in [None, 'exact', 'lsmr']: 

775 raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.") 

776 

777 if loss not in IMPLEMENTED_LOSSES and not callable(loss): 

778 raise ValueError("`loss` must be one of {0} or a callable." 

779 .format(IMPLEMENTED_LOSSES.keys())) 

780 

781 if method == 'lm' and loss != 'linear': 

782 raise ValueError("method='lm' supports only 'linear' loss function.") 

783 

784 if verbose not in [0, 1, 2]: 

785 raise ValueError("`verbose` must be in [0, 1, 2].") 

786 

787 if max_nfev is not None and max_nfev <= 0: 

788 raise ValueError("`max_nfev` must be None or positive integer.") 

789 

790 if np.iscomplexobj(x0): 

791 raise ValueError("`x0` must be real.") 

792 

793 x0 = np.atleast_1d(x0).astype(float) 

794 

795 if x0.ndim > 1: 

796 raise ValueError("`x0` must have at most 1 dimension.") 

797 

798 if isinstance(bounds, Bounds): 

799 lb, ub = bounds.lb, bounds.ub 

800 bounds = (lb, ub) 

801 else: 

802 if len(bounds) == 2: 

803 lb, ub = prepare_bounds(bounds, x0.shape[0]) 

804 else: 

805 raise ValueError("`bounds` must contain 2 elements.") 

806 

807 if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)): 

808 raise ValueError("Method 'lm' doesn't support bounds.") 

809 

810 if lb.shape != x0.shape or ub.shape != x0.shape: 

811 raise ValueError("Inconsistent shapes between bounds and `x0`.") 

812 

813 if np.any(lb >= ub): 

814 raise ValueError("Each lower bound must be strictly less than each " 

815 "upper bound.") 

816 

817 if not in_bounds(x0, lb, ub): 

818 raise ValueError("`x0` is infeasible.") 

819 

820 x_scale = check_x_scale(x_scale, x0) 

821 

822 ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method) 

823 

824 def fun_wrapped(x): 

825 return np.atleast_1d(fun(x, *args, **kwargs)) 

826 

827 if method == 'trf': 

828 x0 = make_strictly_feasible(x0, lb, ub) 

829 

830 f0 = fun_wrapped(x0) 

831 

832 if f0.ndim != 1: 

833 raise ValueError("`fun` must return at most 1-d array_like. " 

834 "f0.shape: {0}".format(f0.shape)) 

835 

836 if not np.all(np.isfinite(f0)): 

837 raise ValueError("Residuals are not finite in the initial point.") 

838 

839 n = x0.size 

840 m = f0.size 

841 

842 if method == 'lm' and m < n: 

843 raise ValueError("Method 'lm' doesn't work when the number of " 

844 "residuals is less than the number of variables.") 

845 

846 loss_function = construct_loss_function(m, loss, f_scale) 

847 if callable(loss): 

848 rho = loss_function(f0) 

849 if rho.shape != (3, m): 

850 raise ValueError("The return value of `loss` callable has wrong " 

851 "shape.") 

852 initial_cost = 0.5 * np.sum(rho[0]) 

853 elif loss_function is not None: 

854 initial_cost = loss_function(f0, cost_only=True) 

855 else: 

856 initial_cost = 0.5 * np.dot(f0, f0) 

857 

858 if callable(jac): 

859 J0 = jac(x0, *args, **kwargs) 

860 

861 if issparse(J0): 

862 J0 = J0.tocsr() 

863 

864 def jac_wrapped(x, _=None): 

865 return jac(x, *args, **kwargs).tocsr() 

866 

867 elif isinstance(J0, LinearOperator): 

868 def jac_wrapped(x, _=None): 

869 return jac(x, *args, **kwargs) 

870 

871 else: 

872 J0 = np.atleast_2d(J0) 

873 

874 def jac_wrapped(x, _=None): 

875 return np.atleast_2d(jac(x, *args, **kwargs)) 

876 

877 else: # Estimate Jacobian by finite differences. 

878 if method == 'lm': 

879 if jac_sparsity is not None: 

880 raise ValueError("method='lm' does not support " 

881 "`jac_sparsity`.") 

882 

883 if jac != '2-point': 

884 warn("jac='{0}' works equivalently to '2-point' " 

885 "for method='lm'.".format(jac)) 

886 

887 J0 = jac_wrapped = None 

888 else: 

889 if jac_sparsity is not None and tr_solver == 'exact': 

890 raise ValueError("tr_solver='exact' is incompatible " 

891 "with `jac_sparsity`.") 

892 

893 jac_sparsity = check_jac_sparsity(jac_sparsity, m, n) 

894 

895 def jac_wrapped(x, f): 

896 J = approx_derivative(fun, x, rel_step=diff_step, method=jac, 

897 f0=f, bounds=bounds, args=args, 

898 kwargs=kwargs, sparsity=jac_sparsity) 

899 if J.ndim != 2: # J is guaranteed not sparse. 

900 J = np.atleast_2d(J) 

901 

902 return J 

903 

904 J0 = jac_wrapped(x0, f0) 

905 

906 if J0 is not None: 

907 if J0.shape != (m, n): 

908 raise ValueError( 

909 "The return value of `jac` has wrong shape: expected {0}, " 

910 "actual {1}.".format((m, n), J0.shape)) 

911 

912 if not isinstance(J0, np.ndarray): 

913 if method == 'lm': 

914 raise ValueError("method='lm' works only with dense " 

915 "Jacobian matrices.") 

916 

917 if tr_solver == 'exact': 

918 raise ValueError( 

919 "tr_solver='exact' works only with dense " 

920 "Jacobian matrices.") 

921 

922 jac_scale = isinstance(x_scale, str) and x_scale == 'jac' 

923 if isinstance(J0, LinearOperator) and jac_scale: 

924 raise ValueError("x_scale='jac' can't be used when `jac` " 

925 "returns LinearOperator.") 

926 

927 if tr_solver is None: 

928 if isinstance(J0, np.ndarray): 

929 tr_solver = 'exact' 

930 else: 

931 tr_solver = 'lsmr' 

932 

933 if method == 'lm': 

934 result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol, 

935 max_nfev, x_scale, diff_step) 

936 

937 elif method == 'trf': 

938 result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol, 

939 gtol, max_nfev, x_scale, loss_function, tr_solver, 

940 tr_options.copy(), verbose) 

941 

942 elif method == 'dogbox': 

943 if tr_solver == 'lsmr' and 'regularize' in tr_options: 

944 warn("The keyword 'regularize' in `tr_options` is not relevant " 

945 "for 'dogbox' method.") 

946 tr_options = tr_options.copy() 

947 del tr_options['regularize'] 

948 

949 result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, 

950 xtol, gtol, max_nfev, x_scale, loss_function, 

951 tr_solver, tr_options, verbose) 

952 

953 result.message = TERMINATION_MESSAGES[result.status] 

954 result.success = result.status > 0 

955 

956 if verbose >= 1: 

957 print(result.message) 

958 print("Function evaluations {0}, initial cost {1:.4e}, final cost " 

959 "{2:.4e}, first-order optimality {3:.2e}." 

960 .format(result.nfev, initial_cost, result.cost, 

961 result.optimality)) 

962 

963 return result