Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_linesearch.py: 6%

301 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1""" 

2Functions 

3--------- 

4.. autosummary:: 

5 :toctree: generated/ 

6 

7 line_search_armijo 

8 line_search_wolfe1 

9 line_search_wolfe2 

10 scalar_search_wolfe1 

11 scalar_search_wolfe2 

12 

13""" 

14from warnings import warn 

15 

16from scipy.optimize import _minpack2 as minpack2 

17import numpy as np 

18 

19__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2', 

20 'scalar_search_wolfe1', 'scalar_search_wolfe2', 

21 'line_search_armijo'] 

22 

23class LineSearchWarning(RuntimeWarning): 

24 pass 

25 

26 

27#------------------------------------------------------------------------------ 

28# Minpack's Wolfe line and scalar searches 

29#------------------------------------------------------------------------------ 

30 

31def line_search_wolfe1(f, fprime, xk, pk, gfk=None, 

32 old_fval=None, old_old_fval=None, 

33 args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8, 

34 xtol=1e-14): 

35 """ 

36 As `scalar_search_wolfe1` but do a line search to direction `pk` 

37 

38 Parameters 

39 ---------- 

40 f : callable 

41 Function `f(x)` 

42 fprime : callable 

43 Gradient of `f` 

44 xk : array_like 

45 Current point 

46 pk : array_like 

47 Search direction 

48 

49 gfk : array_like, optional 

50 Gradient of `f` at point `xk` 

51 old_fval : float, optional 

52 Value of `f` at point `xk` 

53 old_old_fval : float, optional 

54 Value of `f` at point preceding `xk` 

55 

56 The rest of the parameters are the same as for `scalar_search_wolfe1`. 

57 

58 Returns 

59 ------- 

60 stp, f_count, g_count, fval, old_fval 

61 As in `line_search_wolfe1` 

62 gval : array 

63 Gradient of `f` at the final point 

64 

65 """ 

66 if gfk is None: 

67 gfk = fprime(xk, *args) 

68 

69 gval = [gfk] 

70 gc = [0] 

71 fc = [0] 

72 

73 def phi(s): 

74 fc[0] += 1 

75 return f(xk + s*pk, *args) 

76 

77 def derphi(s): 

78 gval[0] = fprime(xk + s*pk, *args) 

79 gc[0] += 1 

80 return np.dot(gval[0], pk) 

81 

82 derphi0 = np.dot(gfk, pk) 

83 

84 stp, fval, old_fval = scalar_search_wolfe1( 

85 phi, derphi, old_fval, old_old_fval, derphi0, 

86 c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol) 

87 

88 return stp, fc[0], gc[0], fval, old_fval, gval[0] 

89 

90 

91def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None, 

92 c1=1e-4, c2=0.9, 

93 amax=50, amin=1e-8, xtol=1e-14): 

94 """ 

95 Scalar function search for alpha that satisfies strong Wolfe conditions 

96 

97 alpha > 0 is assumed to be a descent direction. 

98 

99 Parameters 

100 ---------- 

101 phi : callable phi(alpha) 

102 Function at point `alpha` 

103 derphi : callable phi'(alpha) 

104 Objective function derivative. Returns a scalar. 

105 phi0 : float, optional 

106 Value of phi at 0 

107 old_phi0 : float, optional 

108 Value of phi at previous point 

109 derphi0 : float, optional 

110 Value derphi at 0 

111 c1 : float, optional 

112 Parameter for Armijo condition rule. 

113 c2 : float, optional 

114 Parameter for curvature condition rule. 

115 amax, amin : float, optional 

116 Maximum and minimum step size 

117 xtol : float, optional 

118 Relative tolerance for an acceptable step. 

119 

120 Returns 

121 ------- 

122 alpha : float 

123 Step size, or None if no suitable step was found 

124 phi : float 

125 Value of `phi` at the new point `alpha` 

126 phi0 : float 

127 Value of `phi` at `alpha=0` 

128 

129 Notes 

130 ----- 

131 Uses routine DCSRCH from MINPACK. 

132 

133 """ 

134 

135 if phi0 is None: 

136 phi0 = phi(0.) 

137 if derphi0 is None: 

138 derphi0 = derphi(0.) 

139 

140 if old_phi0 is not None and derphi0 != 0: 

141 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) 

142 if alpha1 < 0: 

143 alpha1 = 1.0 

144 else: 

145 alpha1 = 1.0 

146 

147 phi1 = phi0 

148 derphi1 = derphi0 

149 isave = np.zeros((2,), np.intc) 

150 dsave = np.zeros((13,), float) 

151 task = b'START' 

152 

153 maxiter = 100 

154 for i in range(maxiter): 

155 stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1, 

156 c1, c2, xtol, task, 

157 amin, amax, isave, dsave) 

158 if task[:2] == b'FG': 

159 alpha1 = stp 

160 phi1 = phi(stp) 

161 derphi1 = derphi(stp) 

162 else: 

163 break 

164 else: 

165 # maxiter reached, the line search did not converge 

166 stp = None 

167 

168 if task[:5] == b'ERROR' or task[:4] == b'WARN': 

169 stp = None # failed 

170 

171 return stp, phi1, phi0 

172 

173 

174line_search = line_search_wolfe1 

175 

176 

177#------------------------------------------------------------------------------ 

178# Pure-Python Wolfe line and scalar searches 

179#------------------------------------------------------------------------------ 

180 

181def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None, 

182 old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None, 

183 extra_condition=None, maxiter=10): 

184 """Find alpha that satisfies strong Wolfe conditions. 

185 

186 Parameters 

187 ---------- 

188 f : callable f(x,*args) 

189 Objective function. 

190 myfprime : callable f'(x,*args) 

191 Objective function gradient. 

192 xk : ndarray 

193 Starting point. 

194 pk : ndarray 

195 Search direction. 

196 gfk : ndarray, optional 

197 Gradient value for x=xk (xk being the current parameter 

198 estimate). Will be recomputed if omitted. 

199 old_fval : float, optional 

200 Function value for x=xk. Will be recomputed if omitted. 

201 old_old_fval : float, optional 

202 Function value for the point preceding x=xk. 

203 args : tuple, optional 

204 Additional arguments passed to objective function. 

205 c1 : float, optional 

206 Parameter for Armijo condition rule. 

207 c2 : float, optional 

208 Parameter for curvature condition rule. 

209 amax : float, optional 

210 Maximum step size 

211 extra_condition : callable, optional 

212 A callable of the form ``extra_condition(alpha, x, f, g)`` 

213 returning a boolean. Arguments are the proposed step ``alpha`` 

214 and the corresponding ``x``, ``f`` and ``g`` values. The line search 

215 accepts the value of ``alpha`` only if this 

216 callable returns ``True``. If the callable returns ``False`` 

217 for the step length, the algorithm will continue with 

218 new iterates. The callable is only called for iterates 

219 satisfying the strong Wolfe conditions. 

220 maxiter : int, optional 

221 Maximum number of iterations to perform. 

222 

223 Returns 

224 ------- 

225 alpha : float or None 

226 Alpha for which ``x_new = x0 + alpha * pk``, 

227 or None if the line search algorithm did not converge. 

228 fc : int 

229 Number of function evaluations made. 

230 gc : int 

231 Number of gradient evaluations made. 

232 new_fval : float or None 

233 New function value ``f(x_new)=f(x0+alpha*pk)``, 

234 or None if the line search algorithm did not converge. 

235 old_fval : float 

236 Old function value ``f(x0)``. 

237 new_slope : float or None 

238 The local slope along the search direction at the 

239 new value ``<myfprime(x_new), pk>``, 

240 or None if the line search algorithm did not converge. 

241 

242 

243 Notes 

244 ----- 

245 Uses the line search algorithm to enforce strong Wolfe 

246 conditions. See Wright and Nocedal, 'Numerical Optimization', 

247 1999, pp. 59-61. 

248 

249 Examples 

250 -------- 

251 >>> import numpy as np 

252 >>> from scipy.optimize import line_search 

253 

254 A objective function and its gradient are defined. 

255 

256 >>> def obj_func(x): 

257 ... return (x[0])**2+(x[1])**2 

258 >>> def obj_grad(x): 

259 ... return [2*x[0], 2*x[1]] 

260 

261 We can find alpha that satisfies strong Wolfe conditions. 

262 

263 >>> start_point = np.array([1.8, 1.7]) 

264 >>> search_gradient = np.array([-1.0, -1.0]) 

265 >>> line_search(obj_func, obj_grad, start_point, search_gradient) 

266 (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4]) 

267 

268 """ 

269 fc = [0] 

270 gc = [0] 

271 gval = [None] 

272 gval_alpha = [None] 

273 

274 def phi(alpha): 

275 fc[0] += 1 

276 return f(xk + alpha * pk, *args) 

277 

278 fprime = myfprime 

279 

280 def derphi(alpha): 

281 gc[0] += 1 

282 gval[0] = fprime(xk + alpha * pk, *args) # store for later use 

283 gval_alpha[0] = alpha 

284 return np.dot(gval[0], pk) 

285 

286 if gfk is None: 

287 gfk = fprime(xk, *args) 

288 derphi0 = np.dot(gfk, pk) 

289 

290 if extra_condition is not None: 

291 # Add the current gradient as argument, to avoid needless 

292 # re-evaluation 

293 def extra_condition2(alpha, phi): 

294 if gval_alpha[0] != alpha: 

295 derphi(alpha) 

296 x = xk + alpha * pk 

297 return extra_condition(alpha, x, phi, gval[0]) 

298 else: 

299 extra_condition2 = None 

300 

301 alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2( 

302 phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax, 

303 extra_condition2, maxiter=maxiter) 

304 

305 if derphi_star is None: 

306 warn('The line search algorithm did not converge', LineSearchWarning) 

307 else: 

308 # derphi_star is a number (derphi) -- so use the most recently 

309 # calculated gradient used in computing it derphi = gfk*pk 

310 # this is the gradient at the next step no need to compute it 

311 # again in the outer loop. 

312 derphi_star = gval[0] 

313 

314 return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star 

315 

316 

317def scalar_search_wolfe2(phi, derphi, phi0=None, 

318 old_phi0=None, derphi0=None, 

319 c1=1e-4, c2=0.9, amax=None, 

320 extra_condition=None, maxiter=10): 

321 """Find alpha that satisfies strong Wolfe conditions. 

322 

323 alpha > 0 is assumed to be a descent direction. 

324 

325 Parameters 

326 ---------- 

327 phi : callable phi(alpha) 

328 Objective scalar function. 

329 derphi : callable phi'(alpha) 

330 Objective function derivative. Returns a scalar. 

331 phi0 : float, optional 

332 Value of phi at 0. 

333 old_phi0 : float, optional 

334 Value of phi at previous point. 

335 derphi0 : float, optional 

336 Value of derphi at 0 

337 c1 : float, optional 

338 Parameter for Armijo condition rule. 

339 c2 : float, optional 

340 Parameter for curvature condition rule. 

341 amax : float, optional 

342 Maximum step size. 

343 extra_condition : callable, optional 

344 A callable of the form ``extra_condition(alpha, phi_value)`` 

345 returning a boolean. The line search accepts the value 

346 of ``alpha`` only if this callable returns ``True``. 

347 If the callable returns ``False`` for the step length, 

348 the algorithm will continue with new iterates. 

349 The callable is only called for iterates satisfying 

350 the strong Wolfe conditions. 

351 maxiter : int, optional 

352 Maximum number of iterations to perform. 

353 

354 Returns 

355 ------- 

356 alpha_star : float or None 

357 Best alpha, or None if the line search algorithm did not converge. 

358 phi_star : float 

359 phi at alpha_star. 

360 phi0 : float 

361 phi at 0. 

362 derphi_star : float or None 

363 derphi at alpha_star, or None if the line search algorithm 

364 did not converge. 

365 

366 Notes 

367 ----- 

368 Uses the line search algorithm to enforce strong Wolfe 

369 conditions. See Wright and Nocedal, 'Numerical Optimization', 

370 1999, pp. 59-61. 

371 

372 """ 

373 

374 if phi0 is None: 

375 phi0 = phi(0.) 

376 

377 if derphi0 is None: 

378 derphi0 = derphi(0.) 

379 

380 alpha0 = 0 

381 if old_phi0 is not None and derphi0 != 0: 

382 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) 

383 else: 

384 alpha1 = 1.0 

385 

386 if alpha1 < 0: 

387 alpha1 = 1.0 

388 

389 if amax is not None: 

390 alpha1 = min(alpha1, amax) 

391 

392 phi_a1 = phi(alpha1) 

393 #derphi_a1 = derphi(alpha1) evaluated below 

394 

395 phi_a0 = phi0 

396 derphi_a0 = derphi0 

397 

398 if extra_condition is None: 

399 extra_condition = lambda alpha, phi: True 

400 

401 for i in range(maxiter): 

402 if alpha1 == 0 or (amax is not None and alpha0 == amax): 

403 # alpha1 == 0: This shouldn't happen. Perhaps the increment has 

404 # slipped below machine precision? 

405 alpha_star = None 

406 phi_star = phi0 

407 phi0 = old_phi0 

408 derphi_star = None 

409 

410 if alpha1 == 0: 

411 msg = 'Rounding errors prevent the line search from converging' 

412 else: 

413 msg = "The line search algorithm could not find a solution " + \ 

414 "less than or equal to amax: %s" % amax 

415 

416 warn(msg, LineSearchWarning) 

417 break 

418 

419 not_first_iteration = i > 0 

420 if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \ 

421 ((phi_a1 >= phi_a0) and not_first_iteration): 

422 alpha_star, phi_star, derphi_star = \ 

423 _zoom(alpha0, alpha1, phi_a0, 

424 phi_a1, derphi_a0, phi, derphi, 

425 phi0, derphi0, c1, c2, extra_condition) 

426 break 

427 

428 derphi_a1 = derphi(alpha1) 

429 if (abs(derphi_a1) <= -c2*derphi0): 

430 if extra_condition(alpha1, phi_a1): 

431 alpha_star = alpha1 

432 phi_star = phi_a1 

433 derphi_star = derphi_a1 

434 break 

435 

436 if (derphi_a1 >= 0): 

437 alpha_star, phi_star, derphi_star = \ 

438 _zoom(alpha1, alpha0, phi_a1, 

439 phi_a0, derphi_a1, phi, derphi, 

440 phi0, derphi0, c1, c2, extra_condition) 

441 break 

442 

443 alpha2 = 2 * alpha1 # increase by factor of two on each iteration 

444 if amax is not None: 

445 alpha2 = min(alpha2, amax) 

446 alpha0 = alpha1 

447 alpha1 = alpha2 

448 phi_a0 = phi_a1 

449 phi_a1 = phi(alpha1) 

450 derphi_a0 = derphi_a1 

451 

452 else: 

453 # stopping test maxiter reached 

454 alpha_star = alpha1 

455 phi_star = phi_a1 

456 derphi_star = None 

457 warn('The line search algorithm did not converge', LineSearchWarning) 

458 

459 return alpha_star, phi_star, phi0, derphi_star 

460 

461 

462def _cubicmin(a, fa, fpa, b, fb, c, fc): 

463 """ 

464 Finds the minimizer for a cubic polynomial that goes through the 

465 points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa. 

466 

467 If no minimizer can be found, return None. 

468 

469 """ 

470 # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D 

471 

472 with np.errstate(divide='raise', over='raise', invalid='raise'): 

473 try: 

474 C = fpa 

475 db = b - a 

476 dc = c - a 

477 denom = (db * dc) ** 2 * (db - dc) 

478 d1 = np.empty((2, 2)) 

479 d1[0, 0] = dc ** 2 

480 d1[0, 1] = -db ** 2 

481 d1[1, 0] = -dc ** 3 

482 d1[1, 1] = db ** 3 

483 [A, B] = np.dot(d1, np.asarray([fb - fa - C * db, 

484 fc - fa - C * dc]).flatten()) 

485 A /= denom 

486 B /= denom 

487 radical = B * B - 3 * A * C 

488 xmin = a + (-B + np.sqrt(radical)) / (3 * A) 

489 except ArithmeticError: 

490 return None 

491 if not np.isfinite(xmin): 

492 return None 

493 return xmin 

494 

495 

496def _quadmin(a, fa, fpa, b, fb): 

497 """ 

498 Finds the minimizer for a quadratic polynomial that goes through 

499 the points (a,fa), (b,fb) with derivative at a of fpa. 

500 

501 """ 

502 # f(x) = B*(x-a)^2 + C*(x-a) + D 

503 with np.errstate(divide='raise', over='raise', invalid='raise'): 

504 try: 

505 D = fa 

506 C = fpa 

507 db = b - a * 1.0 

508 B = (fb - D - C * db) / (db * db) 

509 xmin = a - C / (2.0 * B) 

510 except ArithmeticError: 

511 return None 

512 if not np.isfinite(xmin): 

513 return None 

514 return xmin 

515 

516 

517def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo, 

518 phi, derphi, phi0, derphi0, c1, c2, extra_condition): 

519 """Zoom stage of approximate linesearch satisfying strong Wolfe conditions. 

520  

521 Part of the optimization algorithm in `scalar_search_wolfe2`. 

522  

523 Notes 

524 ----- 

525 Implements Algorithm 3.6 (zoom) in Wright and Nocedal, 

526 'Numerical Optimization', 1999, pp. 61. 

527 

528 """ 

529 

530 maxiter = 10 

531 i = 0 

532 delta1 = 0.2 # cubic interpolant check 

533 delta2 = 0.1 # quadratic interpolant check 

534 phi_rec = phi0 

535 a_rec = 0 

536 while True: 

537 # interpolate to find a trial step length between a_lo and 

538 # a_hi Need to choose interpolation here. Use cubic 

539 # interpolation and then if the result is within delta * 

540 # dalpha or outside of the interval bounded by a_lo or a_hi 

541 # then use quadratic interpolation, if the result is still too 

542 # close, then use bisection 

543 

544 dalpha = a_hi - a_lo 

545 if dalpha < 0: 

546 a, b = a_hi, a_lo 

547 else: 

548 a, b = a_lo, a_hi 

549 

550 # minimizer of cubic interpolant 

551 # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi) 

552 # 

553 # if the result is too close to the end points (or out of the 

554 # interval), then use quadratic interpolation with phi_lo, 

555 # derphi_lo and phi_hi if the result is still too close to the 

556 # end points (or out of the interval) then use bisection 

557 

558 if (i > 0): 

559 cchk = delta1 * dalpha 

560 a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, 

561 a_rec, phi_rec) 

562 if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk): 

563 qchk = delta2 * dalpha 

564 a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi) 

565 if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk): 

566 a_j = a_lo + 0.5*dalpha 

567 

568 # Check new value of a_j 

569 

570 phi_aj = phi(a_j) 

571 if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo): 

572 phi_rec = phi_hi 

573 a_rec = a_hi 

574 a_hi = a_j 

575 phi_hi = phi_aj 

576 else: 

577 derphi_aj = derphi(a_j) 

578 if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj): 

579 a_star = a_j 

580 val_star = phi_aj 

581 valprime_star = derphi_aj 

582 break 

583 if derphi_aj*(a_hi - a_lo) >= 0: 

584 phi_rec = phi_hi 

585 a_rec = a_hi 

586 a_hi = a_lo 

587 phi_hi = phi_lo 

588 else: 

589 phi_rec = phi_lo 

590 a_rec = a_lo 

591 a_lo = a_j 

592 phi_lo = phi_aj 

593 derphi_lo = derphi_aj 

594 i += 1 

595 if (i > maxiter): 

596 # Failed to find a conforming step size 

597 a_star = None 

598 val_star = None 

599 valprime_star = None 

600 break 

601 return a_star, val_star, valprime_star 

602 

603 

604#------------------------------------------------------------------------------ 

605# Armijo line and scalar searches 

606#------------------------------------------------------------------------------ 

607 

608def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): 

609 """Minimize over alpha, the function ``f(xk+alpha pk)``. 

610 

611 Parameters 

612 ---------- 

613 f : callable 

614 Function to be minimized. 

615 xk : array_like 

616 Current point. 

617 pk : array_like 

618 Search direction. 

619 gfk : array_like 

620 Gradient of `f` at point `xk`. 

621 old_fval : float 

622 Value of `f` at point `xk`. 

623 args : tuple, optional 

624 Optional arguments. 

625 c1 : float, optional 

626 Value to control stopping criterion. 

627 alpha0 : scalar, optional 

628 Value of `alpha` at start of the optimization. 

629 

630 Returns 

631 ------- 

632 alpha 

633 f_count 

634 f_val_at_alpha 

635 

636 Notes 

637 ----- 

638 Uses the interpolation algorithm (Armijo backtracking) as suggested by 

639 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 

640 

641 """ 

642 xk = np.atleast_1d(xk) 

643 fc = [0] 

644 

645 def phi(alpha1): 

646 fc[0] += 1 

647 return f(xk + alpha1*pk, *args) 

648 

649 if old_fval is None: 

650 phi0 = phi(0.) 

651 else: 

652 phi0 = old_fval # compute f(xk) -- done in past loop 

653 

654 derphi0 = np.dot(gfk, pk) 

655 alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1, 

656 alpha0=alpha0) 

657 return alpha, fc[0], phi1 

658 

659 

660def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): 

661 """ 

662 Compatibility wrapper for `line_search_armijo` 

663 """ 

664 r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1, 

665 alpha0=alpha0) 

666 return r[0], r[1], 0, r[2] 

667 

668 

669def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0): 

670 """Minimize over alpha, the function ``phi(alpha)``. 

671 

672 Uses the interpolation algorithm (Armijo backtracking) as suggested by 

673 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 

674 

675 alpha > 0 is assumed to be a descent direction. 

676 

677 Returns 

678 ------- 

679 alpha 

680 phi1 

681 

682 """ 

683 phi_a0 = phi(alpha0) 

684 if phi_a0 <= phi0 + c1*alpha0*derphi0: 

685 return alpha0, phi_a0 

686 

687 # Otherwise, compute the minimizer of a quadratic interpolant: 

688 

689 alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0) 

690 phi_a1 = phi(alpha1) 

691 

692 if (phi_a1 <= phi0 + c1*alpha1*derphi0): 

693 return alpha1, phi_a1 

694 

695 # Otherwise, loop with cubic interpolation until we find an alpha which 

696 # satisfies the first Wolfe condition (since we are backtracking, we will 

697 # assume that the value of alpha is not too small and satisfies the second 

698 # condition. 

699 

700 while alpha1 > amin: # we are assuming alpha>0 is a descent direction 

701 factor = alpha0**2 * alpha1**2 * (alpha1-alpha0) 

702 a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \ 

703 alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0) 

704 a = a / factor 

705 b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \ 

706 alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0) 

707 b = b / factor 

708 

709 alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a) 

710 phi_a2 = phi(alpha2) 

711 

712 if (phi_a2 <= phi0 + c1*alpha2*derphi0): 

713 return alpha2, phi_a2 

714 

715 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96: 

716 alpha2 = alpha1 / 2.0 

717 

718 alpha0 = alpha1 

719 alpha1 = alpha2 

720 phi_a0 = phi_a1 

721 phi_a1 = phi_a2 

722 

723 # Failed to find a suitable step length 

724 return None, phi_a1 

725 

726 

727#------------------------------------------------------------------------------ 

728# Non-monotone line search for DF-SANE 

729#------------------------------------------------------------------------------ 

730 

731def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta, 

732 gamma=1e-4, tau_min=0.1, tau_max=0.5): 

733 """ 

734 Nonmonotone backtracking line search as described in [1]_ 

735 

736 Parameters 

737 ---------- 

738 f : callable 

739 Function returning a tuple ``(f, F)`` where ``f`` is the value 

740 of a merit function and ``F`` the residual. 

741 x_k : ndarray 

742 Initial position. 

743 d : ndarray 

744 Search direction. 

745 prev_fs : float 

746 List of previous merit function values. Should have ``len(prev_fs) <= M`` 

747 where ``M`` is the nonmonotonicity window parameter. 

748 eta : float 

749 Allowed merit function increase, see [1]_ 

750 gamma, tau_min, tau_max : float, optional 

751 Search parameters, see [1]_ 

752 

753 Returns 

754 ------- 

755 alpha : float 

756 Step length 

757 xp : ndarray 

758 Next position 

759 fp : float 

760 Merit function value at next position 

761 Fp : ndarray 

762 Residual at next position 

763 

764 References 

765 ---------- 

766 [1] "Spectral residual method without gradient information for solving 

767 large-scale nonlinear systems of equations." W. La Cruz, 

768 J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006). 

769 

770 """ 

771 f_k = prev_fs[-1] 

772 f_bar = max(prev_fs) 

773 

774 alpha_p = 1 

775 alpha_m = 1 

776 alpha = 1 

777 

778 while True: 

779 xp = x_k + alpha_p * d 

780 fp, Fp = f(xp) 

781 

782 if fp <= f_bar + eta - gamma * alpha_p**2 * f_k: 

783 alpha = alpha_p 

784 break 

785 

786 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) 

787 

788 xp = x_k - alpha_m * d 

789 fp, Fp = f(xp) 

790 

791 if fp <= f_bar + eta - gamma * alpha_m**2 * f_k: 

792 alpha = -alpha_m 

793 break 

794 

795 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) 

796 

797 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) 

798 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) 

799 

800 return alpha, xp, fp, Fp 

801 

802 

803def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta, 

804 gamma=1e-4, tau_min=0.1, tau_max=0.5, 

805 nu=0.85): 

806 """ 

807 Nonmonotone line search from [1] 

808 

809 Parameters 

810 ---------- 

811 f : callable 

812 Function returning a tuple ``(f, F)`` where ``f`` is the value 

813 of a merit function and ``F`` the residual. 

814 x_k : ndarray 

815 Initial position. 

816 d : ndarray 

817 Search direction. 

818 f_k : float 

819 Initial merit function value. 

820 C, Q : float 

821 Control parameters. On the first iteration, give values 

822 Q=1.0, C=f_k 

823 eta : float 

824 Allowed merit function increase, see [1]_ 

825 nu, gamma, tau_min, tau_max : float, optional 

826 Search parameters, see [1]_ 

827 

828 Returns 

829 ------- 

830 alpha : float 

831 Step length 

832 xp : ndarray 

833 Next position 

834 fp : float 

835 Merit function value at next position 

836 Fp : ndarray 

837 Residual at next position 

838 C : float 

839 New value for the control parameter C 

840 Q : float 

841 New value for the control parameter Q 

842 

843 References 

844 ---------- 

845 .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line 

846 search and its application to the spectral residual 

847 method'', IMA J. Numer. Anal. 29, 814 (2009). 

848 

849 """ 

850 alpha_p = 1 

851 alpha_m = 1 

852 alpha = 1 

853 

854 while True: 

855 xp = x_k + alpha_p * d 

856 fp, Fp = f(xp) 

857 

858 if fp <= C + eta - gamma * alpha_p**2 * f_k: 

859 alpha = alpha_p 

860 break 

861 

862 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) 

863 

864 xp = x_k - alpha_m * d 

865 fp, Fp = f(xp) 

866 

867 if fp <= C + eta - gamma * alpha_m**2 * f_k: 

868 alpha = -alpha_m 

869 break 

870 

871 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) 

872 

873 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) 

874 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) 

875 

876 # Update C and Q 

877 Q_next = nu * Q + 1 

878 C = (nu * Q * (C + eta) + fp) / Q_next 

879 Q = Q_next 

880 

881 return alpha, xp, fp, Fp, C, Q