Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_linesearch.py: 6%
301 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""
2Functions
3---------
4.. autosummary::
5 :toctree: generated/
7 line_search_armijo
8 line_search_wolfe1
9 line_search_wolfe2
10 scalar_search_wolfe1
11 scalar_search_wolfe2
13"""
14from warnings import warn
16from scipy.optimize import _minpack2 as minpack2
17import numpy as np
19__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
20 'scalar_search_wolfe1', 'scalar_search_wolfe2',
21 'line_search_armijo']
23class LineSearchWarning(RuntimeWarning):
24 pass
27#------------------------------------------------------------------------------
28# Minpack's Wolfe line and scalar searches
29#------------------------------------------------------------------------------
31def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
32 old_fval=None, old_old_fval=None,
33 args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
34 xtol=1e-14):
35 """
36 As `scalar_search_wolfe1` but do a line search to direction `pk`
38 Parameters
39 ----------
40 f : callable
41 Function `f(x)`
42 fprime : callable
43 Gradient of `f`
44 xk : array_like
45 Current point
46 pk : array_like
47 Search direction
49 gfk : array_like, optional
50 Gradient of `f` at point `xk`
51 old_fval : float, optional
52 Value of `f` at point `xk`
53 old_old_fval : float, optional
54 Value of `f` at point preceding `xk`
56 The rest of the parameters are the same as for `scalar_search_wolfe1`.
58 Returns
59 -------
60 stp, f_count, g_count, fval, old_fval
61 As in `line_search_wolfe1`
62 gval : array
63 Gradient of `f` at the final point
65 """
66 if gfk is None:
67 gfk = fprime(xk, *args)
69 gval = [gfk]
70 gc = [0]
71 fc = [0]
73 def phi(s):
74 fc[0] += 1
75 return f(xk + s*pk, *args)
77 def derphi(s):
78 gval[0] = fprime(xk + s*pk, *args)
79 gc[0] += 1
80 return np.dot(gval[0], pk)
82 derphi0 = np.dot(gfk, pk)
84 stp, fval, old_fval = scalar_search_wolfe1(
85 phi, derphi, old_fval, old_old_fval, derphi0,
86 c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
88 return stp, fc[0], gc[0], fval, old_fval, gval[0]
91def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
92 c1=1e-4, c2=0.9,
93 amax=50, amin=1e-8, xtol=1e-14):
94 """
95 Scalar function search for alpha that satisfies strong Wolfe conditions
97 alpha > 0 is assumed to be a descent direction.
99 Parameters
100 ----------
101 phi : callable phi(alpha)
102 Function at point `alpha`
103 derphi : callable phi'(alpha)
104 Objective function derivative. Returns a scalar.
105 phi0 : float, optional
106 Value of phi at 0
107 old_phi0 : float, optional
108 Value of phi at previous point
109 derphi0 : float, optional
110 Value derphi at 0
111 c1 : float, optional
112 Parameter for Armijo condition rule.
113 c2 : float, optional
114 Parameter for curvature condition rule.
115 amax, amin : float, optional
116 Maximum and minimum step size
117 xtol : float, optional
118 Relative tolerance for an acceptable step.
120 Returns
121 -------
122 alpha : float
123 Step size, or None if no suitable step was found
124 phi : float
125 Value of `phi` at the new point `alpha`
126 phi0 : float
127 Value of `phi` at `alpha=0`
129 Notes
130 -----
131 Uses routine DCSRCH from MINPACK.
133 """
135 if phi0 is None:
136 phi0 = phi(0.)
137 if derphi0 is None:
138 derphi0 = derphi(0.)
140 if old_phi0 is not None and derphi0 != 0:
141 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
142 if alpha1 < 0:
143 alpha1 = 1.0
144 else:
145 alpha1 = 1.0
147 phi1 = phi0
148 derphi1 = derphi0
149 isave = np.zeros((2,), np.intc)
150 dsave = np.zeros((13,), float)
151 task = b'START'
153 maxiter = 100
154 for i in range(maxiter):
155 stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
156 c1, c2, xtol, task,
157 amin, amax, isave, dsave)
158 if task[:2] == b'FG':
159 alpha1 = stp
160 phi1 = phi(stp)
161 derphi1 = derphi(stp)
162 else:
163 break
164 else:
165 # maxiter reached, the line search did not converge
166 stp = None
168 if task[:5] == b'ERROR' or task[:4] == b'WARN':
169 stp = None # failed
171 return stp, phi1, phi0
174line_search = line_search_wolfe1
177#------------------------------------------------------------------------------
178# Pure-Python Wolfe line and scalar searches
179#------------------------------------------------------------------------------
181def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
182 old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
183 extra_condition=None, maxiter=10):
184 """Find alpha that satisfies strong Wolfe conditions.
186 Parameters
187 ----------
188 f : callable f(x,*args)
189 Objective function.
190 myfprime : callable f'(x,*args)
191 Objective function gradient.
192 xk : ndarray
193 Starting point.
194 pk : ndarray
195 Search direction.
196 gfk : ndarray, optional
197 Gradient value for x=xk (xk being the current parameter
198 estimate). Will be recomputed if omitted.
199 old_fval : float, optional
200 Function value for x=xk. Will be recomputed if omitted.
201 old_old_fval : float, optional
202 Function value for the point preceding x=xk.
203 args : tuple, optional
204 Additional arguments passed to objective function.
205 c1 : float, optional
206 Parameter for Armijo condition rule.
207 c2 : float, optional
208 Parameter for curvature condition rule.
209 amax : float, optional
210 Maximum step size
211 extra_condition : callable, optional
212 A callable of the form ``extra_condition(alpha, x, f, g)``
213 returning a boolean. Arguments are the proposed step ``alpha``
214 and the corresponding ``x``, ``f`` and ``g`` values. The line search
215 accepts the value of ``alpha`` only if this
216 callable returns ``True``. If the callable returns ``False``
217 for the step length, the algorithm will continue with
218 new iterates. The callable is only called for iterates
219 satisfying the strong Wolfe conditions.
220 maxiter : int, optional
221 Maximum number of iterations to perform.
223 Returns
224 -------
225 alpha : float or None
226 Alpha for which ``x_new = x0 + alpha * pk``,
227 or None if the line search algorithm did not converge.
228 fc : int
229 Number of function evaluations made.
230 gc : int
231 Number of gradient evaluations made.
232 new_fval : float or None
233 New function value ``f(x_new)=f(x0+alpha*pk)``,
234 or None if the line search algorithm did not converge.
235 old_fval : float
236 Old function value ``f(x0)``.
237 new_slope : float or None
238 The local slope along the search direction at the
239 new value ``<myfprime(x_new), pk>``,
240 or None if the line search algorithm did not converge.
243 Notes
244 -----
245 Uses the line search algorithm to enforce strong Wolfe
246 conditions. See Wright and Nocedal, 'Numerical Optimization',
247 1999, pp. 59-61.
249 Examples
250 --------
251 >>> import numpy as np
252 >>> from scipy.optimize import line_search
254 A objective function and its gradient are defined.
256 >>> def obj_func(x):
257 ... return (x[0])**2+(x[1])**2
258 >>> def obj_grad(x):
259 ... return [2*x[0], 2*x[1]]
261 We can find alpha that satisfies strong Wolfe conditions.
263 >>> start_point = np.array([1.8, 1.7])
264 >>> search_gradient = np.array([-1.0, -1.0])
265 >>> line_search(obj_func, obj_grad, start_point, search_gradient)
266 (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
268 """
269 fc = [0]
270 gc = [0]
271 gval = [None]
272 gval_alpha = [None]
274 def phi(alpha):
275 fc[0] += 1
276 return f(xk + alpha * pk, *args)
278 fprime = myfprime
280 def derphi(alpha):
281 gc[0] += 1
282 gval[0] = fprime(xk + alpha * pk, *args) # store for later use
283 gval_alpha[0] = alpha
284 return np.dot(gval[0], pk)
286 if gfk is None:
287 gfk = fprime(xk, *args)
288 derphi0 = np.dot(gfk, pk)
290 if extra_condition is not None:
291 # Add the current gradient as argument, to avoid needless
292 # re-evaluation
293 def extra_condition2(alpha, phi):
294 if gval_alpha[0] != alpha:
295 derphi(alpha)
296 x = xk + alpha * pk
297 return extra_condition(alpha, x, phi, gval[0])
298 else:
299 extra_condition2 = None
301 alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
302 phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
303 extra_condition2, maxiter=maxiter)
305 if derphi_star is None:
306 warn('The line search algorithm did not converge', LineSearchWarning)
307 else:
308 # derphi_star is a number (derphi) -- so use the most recently
309 # calculated gradient used in computing it derphi = gfk*pk
310 # this is the gradient at the next step no need to compute it
311 # again in the outer loop.
312 derphi_star = gval[0]
314 return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
317def scalar_search_wolfe2(phi, derphi, phi0=None,
318 old_phi0=None, derphi0=None,
319 c1=1e-4, c2=0.9, amax=None,
320 extra_condition=None, maxiter=10):
321 """Find alpha that satisfies strong Wolfe conditions.
323 alpha > 0 is assumed to be a descent direction.
325 Parameters
326 ----------
327 phi : callable phi(alpha)
328 Objective scalar function.
329 derphi : callable phi'(alpha)
330 Objective function derivative. Returns a scalar.
331 phi0 : float, optional
332 Value of phi at 0.
333 old_phi0 : float, optional
334 Value of phi at previous point.
335 derphi0 : float, optional
336 Value of derphi at 0
337 c1 : float, optional
338 Parameter for Armijo condition rule.
339 c2 : float, optional
340 Parameter for curvature condition rule.
341 amax : float, optional
342 Maximum step size.
343 extra_condition : callable, optional
344 A callable of the form ``extra_condition(alpha, phi_value)``
345 returning a boolean. The line search accepts the value
346 of ``alpha`` only if this callable returns ``True``.
347 If the callable returns ``False`` for the step length,
348 the algorithm will continue with new iterates.
349 The callable is only called for iterates satisfying
350 the strong Wolfe conditions.
351 maxiter : int, optional
352 Maximum number of iterations to perform.
354 Returns
355 -------
356 alpha_star : float or None
357 Best alpha, or None if the line search algorithm did not converge.
358 phi_star : float
359 phi at alpha_star.
360 phi0 : float
361 phi at 0.
362 derphi_star : float or None
363 derphi at alpha_star, or None if the line search algorithm
364 did not converge.
366 Notes
367 -----
368 Uses the line search algorithm to enforce strong Wolfe
369 conditions. See Wright and Nocedal, 'Numerical Optimization',
370 1999, pp. 59-61.
372 """
374 if phi0 is None:
375 phi0 = phi(0.)
377 if derphi0 is None:
378 derphi0 = derphi(0.)
380 alpha0 = 0
381 if old_phi0 is not None and derphi0 != 0:
382 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
383 else:
384 alpha1 = 1.0
386 if alpha1 < 0:
387 alpha1 = 1.0
389 if amax is not None:
390 alpha1 = min(alpha1, amax)
392 phi_a1 = phi(alpha1)
393 #derphi_a1 = derphi(alpha1) evaluated below
395 phi_a0 = phi0
396 derphi_a0 = derphi0
398 if extra_condition is None:
399 extra_condition = lambda alpha, phi: True
401 for i in range(maxiter):
402 if alpha1 == 0 or (amax is not None and alpha0 == amax):
403 # alpha1 == 0: This shouldn't happen. Perhaps the increment has
404 # slipped below machine precision?
405 alpha_star = None
406 phi_star = phi0
407 phi0 = old_phi0
408 derphi_star = None
410 if alpha1 == 0:
411 msg = 'Rounding errors prevent the line search from converging'
412 else:
413 msg = "The line search algorithm could not find a solution " + \
414 "less than or equal to amax: %s" % amax
416 warn(msg, LineSearchWarning)
417 break
419 not_first_iteration = i > 0
420 if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
421 ((phi_a1 >= phi_a0) and not_first_iteration):
422 alpha_star, phi_star, derphi_star = \
423 _zoom(alpha0, alpha1, phi_a0,
424 phi_a1, derphi_a0, phi, derphi,
425 phi0, derphi0, c1, c2, extra_condition)
426 break
428 derphi_a1 = derphi(alpha1)
429 if (abs(derphi_a1) <= -c2*derphi0):
430 if extra_condition(alpha1, phi_a1):
431 alpha_star = alpha1
432 phi_star = phi_a1
433 derphi_star = derphi_a1
434 break
436 if (derphi_a1 >= 0):
437 alpha_star, phi_star, derphi_star = \
438 _zoom(alpha1, alpha0, phi_a1,
439 phi_a0, derphi_a1, phi, derphi,
440 phi0, derphi0, c1, c2, extra_condition)
441 break
443 alpha2 = 2 * alpha1 # increase by factor of two on each iteration
444 if amax is not None:
445 alpha2 = min(alpha2, amax)
446 alpha0 = alpha1
447 alpha1 = alpha2
448 phi_a0 = phi_a1
449 phi_a1 = phi(alpha1)
450 derphi_a0 = derphi_a1
452 else:
453 # stopping test maxiter reached
454 alpha_star = alpha1
455 phi_star = phi_a1
456 derphi_star = None
457 warn('The line search algorithm did not converge', LineSearchWarning)
459 return alpha_star, phi_star, phi0, derphi_star
462def _cubicmin(a, fa, fpa, b, fb, c, fc):
463 """
464 Finds the minimizer for a cubic polynomial that goes through the
465 points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
467 If no minimizer can be found, return None.
469 """
470 # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
472 with np.errstate(divide='raise', over='raise', invalid='raise'):
473 try:
474 C = fpa
475 db = b - a
476 dc = c - a
477 denom = (db * dc) ** 2 * (db - dc)
478 d1 = np.empty((2, 2))
479 d1[0, 0] = dc ** 2
480 d1[0, 1] = -db ** 2
481 d1[1, 0] = -dc ** 3
482 d1[1, 1] = db ** 3
483 [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
484 fc - fa - C * dc]).flatten())
485 A /= denom
486 B /= denom
487 radical = B * B - 3 * A * C
488 xmin = a + (-B + np.sqrt(radical)) / (3 * A)
489 except ArithmeticError:
490 return None
491 if not np.isfinite(xmin):
492 return None
493 return xmin
496def _quadmin(a, fa, fpa, b, fb):
497 """
498 Finds the minimizer for a quadratic polynomial that goes through
499 the points (a,fa), (b,fb) with derivative at a of fpa.
501 """
502 # f(x) = B*(x-a)^2 + C*(x-a) + D
503 with np.errstate(divide='raise', over='raise', invalid='raise'):
504 try:
505 D = fa
506 C = fpa
507 db = b - a * 1.0
508 B = (fb - D - C * db) / (db * db)
509 xmin = a - C / (2.0 * B)
510 except ArithmeticError:
511 return None
512 if not np.isfinite(xmin):
513 return None
514 return xmin
517def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
518 phi, derphi, phi0, derphi0, c1, c2, extra_condition):
519 """Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
521 Part of the optimization algorithm in `scalar_search_wolfe2`.
523 Notes
524 -----
525 Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
526 'Numerical Optimization', 1999, pp. 61.
528 """
530 maxiter = 10
531 i = 0
532 delta1 = 0.2 # cubic interpolant check
533 delta2 = 0.1 # quadratic interpolant check
534 phi_rec = phi0
535 a_rec = 0
536 while True:
537 # interpolate to find a trial step length between a_lo and
538 # a_hi Need to choose interpolation here. Use cubic
539 # interpolation and then if the result is within delta *
540 # dalpha or outside of the interval bounded by a_lo or a_hi
541 # then use quadratic interpolation, if the result is still too
542 # close, then use bisection
544 dalpha = a_hi - a_lo
545 if dalpha < 0:
546 a, b = a_hi, a_lo
547 else:
548 a, b = a_lo, a_hi
550 # minimizer of cubic interpolant
551 # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
552 #
553 # if the result is too close to the end points (or out of the
554 # interval), then use quadratic interpolation with phi_lo,
555 # derphi_lo and phi_hi if the result is still too close to the
556 # end points (or out of the interval) then use bisection
558 if (i > 0):
559 cchk = delta1 * dalpha
560 a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
561 a_rec, phi_rec)
562 if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
563 qchk = delta2 * dalpha
564 a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
565 if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
566 a_j = a_lo + 0.5*dalpha
568 # Check new value of a_j
570 phi_aj = phi(a_j)
571 if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
572 phi_rec = phi_hi
573 a_rec = a_hi
574 a_hi = a_j
575 phi_hi = phi_aj
576 else:
577 derphi_aj = derphi(a_j)
578 if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
579 a_star = a_j
580 val_star = phi_aj
581 valprime_star = derphi_aj
582 break
583 if derphi_aj*(a_hi - a_lo) >= 0:
584 phi_rec = phi_hi
585 a_rec = a_hi
586 a_hi = a_lo
587 phi_hi = phi_lo
588 else:
589 phi_rec = phi_lo
590 a_rec = a_lo
591 a_lo = a_j
592 phi_lo = phi_aj
593 derphi_lo = derphi_aj
594 i += 1
595 if (i > maxiter):
596 # Failed to find a conforming step size
597 a_star = None
598 val_star = None
599 valprime_star = None
600 break
601 return a_star, val_star, valprime_star
604#------------------------------------------------------------------------------
605# Armijo line and scalar searches
606#------------------------------------------------------------------------------
608def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
609 """Minimize over alpha, the function ``f(xk+alpha pk)``.
611 Parameters
612 ----------
613 f : callable
614 Function to be minimized.
615 xk : array_like
616 Current point.
617 pk : array_like
618 Search direction.
619 gfk : array_like
620 Gradient of `f` at point `xk`.
621 old_fval : float
622 Value of `f` at point `xk`.
623 args : tuple, optional
624 Optional arguments.
625 c1 : float, optional
626 Value to control stopping criterion.
627 alpha0 : scalar, optional
628 Value of `alpha` at start of the optimization.
630 Returns
631 -------
632 alpha
633 f_count
634 f_val_at_alpha
636 Notes
637 -----
638 Uses the interpolation algorithm (Armijo backtracking) as suggested by
639 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
641 """
642 xk = np.atleast_1d(xk)
643 fc = [0]
645 def phi(alpha1):
646 fc[0] += 1
647 return f(xk + alpha1*pk, *args)
649 if old_fval is None:
650 phi0 = phi(0.)
651 else:
652 phi0 = old_fval # compute f(xk) -- done in past loop
654 derphi0 = np.dot(gfk, pk)
655 alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
656 alpha0=alpha0)
657 return alpha, fc[0], phi1
660def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
661 """
662 Compatibility wrapper for `line_search_armijo`
663 """
664 r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
665 alpha0=alpha0)
666 return r[0], r[1], 0, r[2]
669def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
670 """Minimize over alpha, the function ``phi(alpha)``.
672 Uses the interpolation algorithm (Armijo backtracking) as suggested by
673 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
675 alpha > 0 is assumed to be a descent direction.
677 Returns
678 -------
679 alpha
680 phi1
682 """
683 phi_a0 = phi(alpha0)
684 if phi_a0 <= phi0 + c1*alpha0*derphi0:
685 return alpha0, phi_a0
687 # Otherwise, compute the minimizer of a quadratic interpolant:
689 alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
690 phi_a1 = phi(alpha1)
692 if (phi_a1 <= phi0 + c1*alpha1*derphi0):
693 return alpha1, phi_a1
695 # Otherwise, loop with cubic interpolation until we find an alpha which
696 # satisfies the first Wolfe condition (since we are backtracking, we will
697 # assume that the value of alpha is not too small and satisfies the second
698 # condition.
700 while alpha1 > amin: # we are assuming alpha>0 is a descent direction
701 factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
702 a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
703 alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
704 a = a / factor
705 b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
706 alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
707 b = b / factor
709 alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
710 phi_a2 = phi(alpha2)
712 if (phi_a2 <= phi0 + c1*alpha2*derphi0):
713 return alpha2, phi_a2
715 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
716 alpha2 = alpha1 / 2.0
718 alpha0 = alpha1
719 alpha1 = alpha2
720 phi_a0 = phi_a1
721 phi_a1 = phi_a2
723 # Failed to find a suitable step length
724 return None, phi_a1
727#------------------------------------------------------------------------------
728# Non-monotone line search for DF-SANE
729#------------------------------------------------------------------------------
731def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
732 gamma=1e-4, tau_min=0.1, tau_max=0.5):
733 """
734 Nonmonotone backtracking line search as described in [1]_
736 Parameters
737 ----------
738 f : callable
739 Function returning a tuple ``(f, F)`` where ``f`` is the value
740 of a merit function and ``F`` the residual.
741 x_k : ndarray
742 Initial position.
743 d : ndarray
744 Search direction.
745 prev_fs : float
746 List of previous merit function values. Should have ``len(prev_fs) <= M``
747 where ``M`` is the nonmonotonicity window parameter.
748 eta : float
749 Allowed merit function increase, see [1]_
750 gamma, tau_min, tau_max : float, optional
751 Search parameters, see [1]_
753 Returns
754 -------
755 alpha : float
756 Step length
757 xp : ndarray
758 Next position
759 fp : float
760 Merit function value at next position
761 Fp : ndarray
762 Residual at next position
764 References
765 ----------
766 [1] "Spectral residual method without gradient information for solving
767 large-scale nonlinear systems of equations." W. La Cruz,
768 J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
770 """
771 f_k = prev_fs[-1]
772 f_bar = max(prev_fs)
774 alpha_p = 1
775 alpha_m = 1
776 alpha = 1
778 while True:
779 xp = x_k + alpha_p * d
780 fp, Fp = f(xp)
782 if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
783 alpha = alpha_p
784 break
786 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
788 xp = x_k - alpha_m * d
789 fp, Fp = f(xp)
791 if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
792 alpha = -alpha_m
793 break
795 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
797 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
798 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
800 return alpha, xp, fp, Fp
803def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
804 gamma=1e-4, tau_min=0.1, tau_max=0.5,
805 nu=0.85):
806 """
807 Nonmonotone line search from [1]
809 Parameters
810 ----------
811 f : callable
812 Function returning a tuple ``(f, F)`` where ``f`` is the value
813 of a merit function and ``F`` the residual.
814 x_k : ndarray
815 Initial position.
816 d : ndarray
817 Search direction.
818 f_k : float
819 Initial merit function value.
820 C, Q : float
821 Control parameters. On the first iteration, give values
822 Q=1.0, C=f_k
823 eta : float
824 Allowed merit function increase, see [1]_
825 nu, gamma, tau_min, tau_max : float, optional
826 Search parameters, see [1]_
828 Returns
829 -------
830 alpha : float
831 Step length
832 xp : ndarray
833 Next position
834 fp : float
835 Merit function value at next position
836 Fp : ndarray
837 Residual at next position
838 C : float
839 New value for the control parameter C
840 Q : float
841 New value for the control parameter Q
843 References
844 ----------
845 .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
846 search and its application to the spectral residual
847 method'', IMA J. Numer. Anal. 29, 814 (2009).
849 """
850 alpha_p = 1
851 alpha_m = 1
852 alpha = 1
854 while True:
855 xp = x_k + alpha_p * d
856 fp, Fp = f(xp)
858 if fp <= C + eta - gamma * alpha_p**2 * f_k:
859 alpha = alpha_p
860 break
862 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
864 xp = x_k - alpha_m * d
865 fp, Fp = f(xp)
867 if fp <= C + eta - gamma * alpha_m**2 * f_k:
868 alpha = -alpha_m
869 break
871 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
873 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
874 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
876 # Update C and Q
877 Q_next = nu * Q + 1
878 C = (nu * Q * (C + eta) + fp) / Q_next
879 Q = Q_next
881 return alpha, xp, fp, Fp, C, Q