Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lbfgsb_py.py: 10%
131 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""
2Functions
3---------
4.. autosummary::
5 :toctree: generated/
7 fmin_l_bfgs_b
9"""
11## License for the Python wrapper
12## ==============================
14## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
16## Permission is hereby granted, free of charge, to any person obtaining a
17## copy of this software and associated documentation files (the "Software"),
18## to deal in the Software without restriction, including without limitation
19## the rights to use, copy, modify, merge, publish, distribute, sublicense,
20## and/or sell copies of the Software, and to permit persons to whom the
21## Software is furnished to do so, subject to the following conditions:
23## The above copyright notice and this permission notice shall be included in
24## all copies or substantial portions of the Software.
26## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32## DEALINGS IN THE SOFTWARE.
34## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
36import numpy as np
37from numpy import array, asarray, float64, zeros
38from . import _lbfgsb
39from ._optimize import (MemoizeJac, OptimizeResult,
40 _check_unknown_options, _prepare_scalar_function)
41from ._constraints import old_bound_to_new
43from scipy.sparse.linalg import LinearOperator
45__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
48def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
49 approx_grad=0,
50 bounds=None, m=10, factr=1e7, pgtol=1e-5,
51 epsilon=1e-8,
52 iprint=-1, maxfun=15000, maxiter=15000, disp=None,
53 callback=None, maxls=20):
54 """
55 Minimize a function func using the L-BFGS-B algorithm.
57 Parameters
58 ----------
59 func : callable f(x,*args)
60 Function to minimize.
61 x0 : ndarray
62 Initial guess.
63 fprime : callable fprime(x,*args), optional
64 The gradient of `func`. If None, then `func` returns the function
65 value and the gradient (``f, g = func(x, *args)``), unless
66 `approx_grad` is True in which case `func` returns only ``f``.
67 args : sequence, optional
68 Arguments to pass to `func` and `fprime`.
69 approx_grad : bool, optional
70 Whether to approximate the gradient numerically (in which case
71 `func` returns only the function value).
72 bounds : list, optional
73 ``(min, max)`` pairs for each element in ``x``, defining
74 the bounds on that parameter. Use None or +-inf for one of ``min`` or
75 ``max`` when there is no bound in that direction.
76 m : int, optional
77 The maximum number of variable metric corrections
78 used to define the limited memory matrix. (The limited memory BFGS
79 method does not store the full hessian but uses this many terms in an
80 approximation to it.)
81 factr : float, optional
82 The iteration stops when
83 ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
84 where ``eps`` is the machine precision, which is automatically
85 generated by the code. Typical values for `factr` are: 1e12 for
86 low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
87 high accuracy. See Notes for relationship to `ftol`, which is exposed
88 (instead of `factr`) by the `scipy.optimize.minimize` interface to
89 L-BFGS-B.
90 pgtol : float, optional
91 The iteration will stop when
92 ``max{|proj g_i | i = 1, ..., n} <= pgtol``
93 where ``pg_i`` is the i-th component of the projected gradient.
94 epsilon : float, optional
95 Step size used when `approx_grad` is True, for numerically
96 calculating the gradient
97 iprint : int, optional
98 Controls the frequency of output. ``iprint < 0`` means no output;
99 ``iprint = 0`` print only one line at the last iteration;
100 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
101 ``iprint = 99`` print details of every iteration except n-vectors;
102 ``iprint = 100`` print also the changes of active set and final x;
103 ``iprint > 100`` print details of every iteration including x and g.
104 disp : int, optional
105 If zero, then no output. If a positive number, then this over-rides
106 `iprint` (i.e., `iprint` gets the value of `disp`).
107 maxfun : int, optional
108 Maximum number of function evaluations. Note that this function
109 may violate the limit because of evaluating gradients by numerical
110 differentiation.
111 maxiter : int, optional
112 Maximum number of iterations.
113 callback : callable, optional
114 Called after each iteration, as ``callback(xk)``, where ``xk`` is the
115 current parameter vector.
116 maxls : int, optional
117 Maximum number of line search steps (per iteration). Default is 20.
119 Returns
120 -------
121 x : array_like
122 Estimated position of the minimum.
123 f : float
124 Value of `func` at the minimum.
125 d : dict
126 Information dictionary.
128 * d['warnflag'] is
130 - 0 if converged,
131 - 1 if too many function evaluations or too many iterations,
132 - 2 if stopped for another reason, given in d['task']
134 * d['grad'] is the gradient at the minimum (should be 0 ish)
135 * d['funcalls'] is the number of function calls made.
136 * d['nit'] is the number of iterations.
138 See also
139 --------
140 minimize: Interface to minimization algorithms for multivariate
141 functions. See the 'L-BFGS-B' `method` in particular. Note that the
142 `ftol` option is made available via that interface, while `factr` is
143 provided via this interface, where `factr` is the factor multiplying
144 the default machine floating-point precision to arrive at `ftol`:
145 ``ftol = factr * numpy.finfo(float).eps``.
147 Notes
148 -----
149 License of L-BFGS-B (FORTRAN code):
151 The version included here (in fortran code) is 3.0
152 (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
153 and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
154 condition for use:
156 This software is freely available, but we expect that all publications
157 describing work using this software, or all commercial products using it,
158 quote at least one of the references given below. This software is released
159 under the BSD License.
161 References
162 ----------
163 * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
164 Constrained Optimization, (1995), SIAM Journal on Scientific and
165 Statistical Computing, 16, 5, pp. 1190-1208.
166 * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
167 FORTRAN routines for large scale bound constrained optimization (1997),
168 ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
169 * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
170 FORTRAN routines for large scale bound constrained optimization (2011),
171 ACM Transactions on Mathematical Software, 38, 1.
173 """
174 # handle fprime/approx_grad
175 if approx_grad:
176 fun = func
177 jac = None
178 elif fprime is None:
179 fun = MemoizeJac(func)
180 jac = fun.derivative
181 else:
182 fun = func
183 jac = fprime
185 # build options
186 opts = {'disp': disp,
187 'iprint': iprint,
188 'maxcor': m,
189 'ftol': factr * np.finfo(float).eps,
190 'gtol': pgtol,
191 'eps': epsilon,
192 'maxfun': maxfun,
193 'maxiter': maxiter,
194 'callback': callback,
195 'maxls': maxls}
197 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
198 **opts)
199 d = {'grad': res['jac'],
200 'task': res['message'],
201 'funcalls': res['nfev'],
202 'nit': res['nit'],
203 'warnflag': res['status']}
204 f = res['fun']
205 x = res['x']
207 return x, f, d
210def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
211 disp=None, maxcor=10, ftol=2.2204460492503131e-09,
212 gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
213 iprint=-1, callback=None, maxls=20,
214 finite_diff_rel_step=None, **unknown_options):
215 """
216 Minimize a scalar function of one or more variables using the L-BFGS-B
217 algorithm.
219 Options
220 -------
221 disp : None or int
222 If `disp is None` (the default), then the supplied version of `iprint`
223 is used. If `disp is not None`, then it overrides the supplied version
224 of `iprint` with the behaviour you outlined.
225 maxcor : int
226 The maximum number of variable metric corrections used to
227 define the limited memory matrix. (The limited memory BFGS
228 method does not store the full hessian but uses this many terms
229 in an approximation to it.)
230 ftol : float
231 The iteration stops when ``(f^k -
232 f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
233 gtol : float
234 The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
235 <= gtol`` where ``pg_i`` is the i-th component of the
236 projected gradient.
237 eps : float or ndarray
238 If `jac is None` the absolute step size used for numerical
239 approximation of the jacobian via forward differences.
240 maxfun : int
241 Maximum number of function evaluations. Note that this function
242 may violate the limit because of evaluating gradients by numerical
243 differentiation.
244 maxiter : int
245 Maximum number of iterations.
246 iprint : int, optional
247 Controls the frequency of output. ``iprint < 0`` means no output;
248 ``iprint = 0`` print only one line at the last iteration;
249 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
250 ``iprint = 99`` print details of every iteration except n-vectors;
251 ``iprint = 100`` print also the changes of active set and final x;
252 ``iprint > 100`` print details of every iteration including x and g.
253 maxls : int, optional
254 Maximum number of line search steps (per iteration). Default is 20.
255 finite_diff_rel_step : None or array_like, optional
256 If `jac in ['2-point', '3-point', 'cs']` the relative step size to
257 use for numerical approximation of the jacobian. The absolute step
258 size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
259 possibly adjusted to fit into the bounds. For ``method='3-point'``
260 the sign of `h` is ignored. If None (default) then step is selected
261 automatically.
263 Notes
264 -----
265 The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
266 but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
267 relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
268 I.e., `factr` multiplies the default machine floating-point precision to
269 arrive at `ftol`.
271 """
272 _check_unknown_options(unknown_options)
273 m = maxcor
274 pgtol = gtol
275 factr = ftol / np.finfo(float).eps
277 x0 = asarray(x0).ravel()
278 n, = x0.shape
280 if bounds is None:
281 bounds = [(None, None)] * n
282 if len(bounds) != n:
283 raise ValueError('length of x0 != length of bounds')
285 # unbounded variables must use None, not +-inf, for optimizer to work properly
286 bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds]
287 # LBFGSB is sent 'old-style' bounds, 'new-style' bounds are required by
288 # approx_derivative and ScalarFunction
289 new_bounds = old_bound_to_new(bounds)
291 # check bounds
292 if (new_bounds[0] > new_bounds[1]).any():
293 raise ValueError("LBFGSB - one of the lower bounds is greater than an upper bound.")
295 # initial vector must lie within the bounds. Otherwise ScalarFunction and
296 # approx_derivative will cause problems
297 x0 = np.clip(x0, new_bounds[0], new_bounds[1])
299 if disp is not None:
300 if disp == 0:
301 iprint = -1
302 else:
303 iprint = disp
305 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
306 bounds=new_bounds,
307 finite_diff_rel_step=finite_diff_rel_step)
309 func_and_grad = sf.fun_and_grad
311 fortran_int = _lbfgsb.types.intvar.dtype
313 nbd = zeros(n, fortran_int)
314 low_bnd = zeros(n, float64)
315 upper_bnd = zeros(n, float64)
316 bounds_map = {(None, None): 0,
317 (1, None): 1,
318 (1, 1): 2,
319 (None, 1): 3}
320 for i in range(0, n):
321 l, u = bounds[i]
322 if l is not None:
323 low_bnd[i] = l
324 l = 1
325 if u is not None:
326 upper_bnd[i] = u
327 u = 1
328 nbd[i] = bounds_map[l, u]
330 if not maxls > 0:
331 raise ValueError('maxls must be positive.')
333 x = array(x0, float64)
334 f = array(0.0, float64)
335 g = zeros((n,), float64)
336 wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
337 iwa = zeros(3*n, fortran_int)
338 task = zeros(1, 'S60')
339 csave = zeros(1, 'S60')
340 lsave = zeros(4, fortran_int)
341 isave = zeros(44, fortran_int)
342 dsave = zeros(29, float64)
344 task[:] = 'START'
346 n_iterations = 0
348 while 1:
349 # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
350 _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
351 pgtol, wa, iwa, task, iprint, csave, lsave,
352 isave, dsave, maxls)
353 task_str = task.tobytes()
354 if task_str.startswith(b'FG'):
355 # The minimization routine wants f and g at the current x.
356 # Note that interruptions due to maxfun are postponed
357 # until the completion of the current minimization iteration.
358 # Overwrite f and g:
359 f, g = func_and_grad(x)
360 elif task_str.startswith(b'NEW_X'):
361 # new iteration
362 n_iterations += 1
363 if callback is not None:
364 callback(np.copy(x))
366 if n_iterations >= maxiter:
367 task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
368 elif sf.nfev > maxfun:
369 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
370 'EXCEEDS LIMIT')
371 else:
372 break
374 task_str = task.tobytes().strip(b'\x00').strip()
375 if task_str.startswith(b'CONV'):
376 warnflag = 0
377 elif sf.nfev > maxfun or n_iterations >= maxiter:
378 warnflag = 1
379 else:
380 warnflag = 2
382 # These two portions of the workspace are described in the mainlb
383 # subroutine in lbfgsb.f. See line 363.
384 s = wa[0: m*n].reshape(m, n)
385 y = wa[m*n: 2*m*n].reshape(m, n)
387 # See lbfgsb.f line 160 for this portion of the workspace.
388 # isave(31) = the total number of BFGS updates prior the current iteration;
389 n_bfgs_updates = isave[30]
391 n_corrs = min(n_bfgs_updates, maxcor)
392 hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
394 task_str = task_str.decode()
395 return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
396 njev=sf.ngev,
397 nit=n_iterations, status=warnflag, message=task_str,
398 x=x, success=(warnflag == 0), hess_inv=hess_inv)
401class LbfgsInvHessProduct(LinearOperator):
402 """Linear operator for the L-BFGS approximate inverse Hessian.
404 This operator computes the product of a vector with the approximate inverse
405 of the Hessian of the objective function, using the L-BFGS limited
406 memory approximation to the inverse Hessian, accumulated during the
407 optimization.
409 Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
410 interface.
412 Parameters
413 ----------
414 sk : array_like, shape=(n_corr, n)
415 Array of `n_corr` most recent updates to the solution vector.
416 (See [1]).
417 yk : array_like, shape=(n_corr, n)
418 Array of `n_corr` most recent updates to the gradient. (See [1]).
420 References
421 ----------
422 .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
423 storage." Mathematics of computation 35.151 (1980): 773-782.
425 """
427 def __init__(self, sk, yk):
428 """Construct the operator."""
429 if sk.shape != yk.shape or sk.ndim != 2:
430 raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
431 n_corrs, n = sk.shape
433 super().__init__(dtype=np.float64, shape=(n, n))
435 self.sk = sk
436 self.yk = yk
437 self.n_corrs = n_corrs
438 self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
440 def _matvec(self, x):
441 """Efficient matrix-vector multiply with the BFGS matrices.
443 This calculation is described in Section (4) of [1].
445 Parameters
446 ----------
447 x : ndarray
448 An array with shape (n,) or (n,1).
450 Returns
451 -------
452 y : ndarray
453 The matrix-vector product
455 """
456 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
457 q = np.array(x, dtype=self.dtype, copy=True)
458 if q.ndim == 2 and q.shape[1] == 1:
459 q = q.reshape(-1)
461 alpha = np.empty(n_corrs)
463 for i in range(n_corrs-1, -1, -1):
464 alpha[i] = rho[i] * np.dot(s[i], q)
465 q = q - alpha[i]*y[i]
467 r = q
468 for i in range(n_corrs):
469 beta = rho[i] * np.dot(y[i], r)
470 r = r + s[i] * (alpha[i] - beta)
472 return r
474 def todense(self):
475 """Return a dense array representation of this operator.
477 Returns
478 -------
479 arr : ndarray, shape=(n, n)
480 An array with the same shape and containing
481 the same data represented by this `LinearOperator`.
483 """
484 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
485 I = np.eye(*self.shape, dtype=self.dtype)
486 Hk = I
488 for i in range(n_corrs):
489 A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
490 A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
492 Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *
493 s[i][np.newaxis, :])
494 return Hk