Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lbfgsb

1"""

2Functions

3---------

4.. autosummary::

5 :toctree: generated/

7 fmin_l_bfgs_b

9"""

11## License for the Python wrapper

12## ==============================

16## Permission is hereby granted, free of charge, to any person obtaining a

17## copy of this software and associated documentation files (the "Software"),

18## to deal in the Software without restriction, including without limitation

19## the rights to use, copy, modify, merge, publish, distribute, sublicense,

20## and/or sell copies of the Software, and to permit persons to whom the

21## Software is furnished to do so, subject to the following conditions:

23## The above copyright notice and this permission notice shall be included in

24## all copies or substantial portions of the Software.

26## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

27## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

28## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

29## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

30## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

31## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

32## DEALINGS IN THE SOFTWARE.

34## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy

36import numpy as np

37from numpy import array, asarray, float64, zeros

38from . import _lbfgsb

39from ._optimize import (MemoizeJac, OptimizeResult,

40 _check_unknown_options, _prepare_scalar_function)

41from ._constraints import old_bound_to_new

43from scipy.sparse.linalg import LinearOperator

45__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']

48def fmin_l_bfgs_b(func, x0, fprime=None, args=(),

49 approx_grad=0,

50 bounds=None, m=10, factr=1e7, pgtol=1e-5,

51 epsilon=1e-8,

52 iprint=-1, maxfun=15000, maxiter=15000, disp=None,

53 callback=None, maxls=20):

54 """

55 Minimize a function func using the L-BFGS-B algorithm.

57 Parameters

58 ----------

59 func : callable f(x,*args)

60 Function to minimize.

61 x0 : ndarray

62 Initial guess.

63 fprime : callable fprime(x,*args), optional

64 The gradient of `func`. If None, then `func` returns the function

65 value and the gradient (``f, g = func(x, *args)``), unless

66 `approx_grad` is True in which case `func` returns only ``f``.

67 args : sequence, optional

68 Arguments to pass to `func` and `fprime`.

69 approx_grad : bool, optional

70 Whether to approximate the gradient numerically (in which case

71 `func` returns only the function value).

72 bounds : list, optional

73 ``(min, max)`` pairs for each element in ``x``, defining

74 the bounds on that parameter. Use None or +-inf for one of ``min`` or

75 ``max`` when there is no bound in that direction.

76 m : int, optional

77 The maximum number of variable metric corrections

78 used to define the limited memory matrix. (The limited memory BFGS

79 method does not store the full hessian but uses this many terms in an

80 approximation to it.)

81 factr : float, optional

82 The iteration stops when

83 ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,

84 where ``eps`` is the machine precision, which is automatically

85 generated by the code. Typical values for `factr` are: 1e12 for

86 low accuracy; 1e7 for moderate accuracy; 10.0 for extremely

87 high accuracy. See Notes for relationship to `ftol`, which is exposed

88 (instead of `factr`) by the `scipy.optimize.minimize` interface to

89 L-BFGS-B.

90 pgtol : float, optional

91 The iteration will stop when

92 ``max{|proj g_i | i = 1, ..., n} <= pgtol``

93 where ``pg_i`` is the i-th component of the projected gradient.

94 epsilon : float, optional

95 Step size used when `approx_grad` is True, for numerically

96 calculating the gradient

97 iprint : int, optional

98 Controls the frequency of output. ``iprint < 0`` means no output;

99 ``iprint = 0`` print only one line at the last iteration;

100 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;

101 ``iprint = 99`` print details of every iteration except n-vectors;

102 ``iprint = 100`` print also the changes of active set and final x;

103 ``iprint > 100`` print details of every iteration including x and g.

104 disp : int, optional

105 If zero, then no output. If a positive number, then this over-rides

106 `iprint` (i.e., `iprint` gets the value of `disp`).

107 maxfun : int, optional

108 Maximum number of function evaluations. Note that this function

109 may violate the limit because of evaluating gradients by numerical

110 differentiation.

111 maxiter : int, optional

112 Maximum number of iterations.

113 callback : callable, optional

114 Called after each iteration, as ``callback(xk)``, where ``xk`` is the

115 current parameter vector.

116 maxls : int, optional

117 Maximum number of line search steps (per iteration). Default is 20.

118

119 Returns

120 -------

121 x : array_like

122 Estimated position of the minimum.

123 f : float

124 Value of `func` at the minimum.

125 d : dict

126 Information dictionary.

127

128 * d['warnflag'] is

129

130 - 0 if converged,

131 - 1 if too many function evaluations or too many iterations,

132 - 2 if stopped for another reason, given in d['task']

133

134 * d['grad'] is the gradient at the minimum (should be 0 ish)

135 * d['funcalls'] is the number of function calls made.

136 * d['nit'] is the number of iterations.

137

138 See also

139 --------

140 minimize: Interface to minimization algorithms for multivariate

141 functions. See the 'L-BFGS-B' `method` in particular. Note that the

142 `ftol` option is made available via that interface, while `factr` is

143 provided via this interface, where `factr` is the factor multiplying

144 the default machine floating-point precision to arrive at `ftol`:

145 ``ftol = factr * numpy.finfo(float).eps``.

146

147 Notes

148 -----

149 License of L-BFGS-B (FORTRAN code):

150

151 The version included here (in fortran code) is 3.0

152 (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,

153 and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following

154 condition for use:

155

156 This software is freely available, but we expect that all publications

157 describing work using this software, or all commercial products using it,

158 quote at least one of the references given below. This software is released

159 under the BSD License.

160

161 References

162 ----------

163 * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound

164 Constrained Optimization, (1995), SIAM Journal on Scientific and

165 Statistical Computing, 16, 5, pp. 1190-1208.

166 * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,

167 FORTRAN routines for large scale bound constrained optimization (1997),

168 ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.

169 * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,

170 FORTRAN routines for large scale bound constrained optimization (2011),

171 ACM Transactions on Mathematical Software, 38, 1.

172

173 """

174 # handle fprime/approx_grad

175 if approx_grad:

176 fun = func

177 jac = None

178 elif fprime is None:

179 fun = MemoizeJac(func)

180 jac = fun.derivative

181 else:

182 fun = func

183 jac = fprime

184

185 # build options

186 opts = {'disp': disp,

187 'iprint': iprint,

188 'maxcor': m,

189 'ftol': factr * np.finfo(float).eps,

190 'gtol': pgtol,

191 'eps': epsilon,

192 'maxfun': maxfun,

193 'maxiter': maxiter,

194 'callback': callback,

195 'maxls': maxls}

196

197 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,

198 **opts)

199 d = {'grad': res['jac'],

200 'task': res['message'],

201 'funcalls': res['nfev'],

202 'nit': res['nit'],

203 'warnflag': res['status']}

204 f = res['fun']

205 x = res['x']

206

207 return x, f, d

208

209

210def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,

211 disp=None, maxcor=10, ftol=2.2204460492503131e-09,

212 gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,

213 iprint=-1, callback=None, maxls=20,

214 finite_diff_rel_step=None, **unknown_options):

215 """

216 Minimize a scalar function of one or more variables using the L-BFGS-B

217 algorithm.

218

219 Options

220 -------

221 disp : None or int

222 If `disp is None` (the default), then the supplied version of `iprint`

223 is used. If `disp is not None`, then it overrides the supplied version

224 of `iprint` with the behaviour you outlined.

225 maxcor : int

226 The maximum number of variable metric corrections used to

227 define the limited memory matrix. (The limited memory BFGS

228 method does not store the full hessian but uses this many terms

229 in an approximation to it.)

230 ftol : float

231 The iteration stops when ``(f^k -

232 f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.

233 gtol : float

234 The iteration will stop when ``max{|proj g_i | i = 1, ..., n}

235 <= gtol`` where ``pg_i`` is the i-th component of the

236 projected gradient.

237 eps : float or ndarray

238 If `jac is None` the absolute step size used for numerical

239 approximation of the jacobian via forward differences.

240 maxfun : int

241 Maximum number of function evaluations. Note that this function

242 may violate the limit because of evaluating gradients by numerical

243 differentiation.

244 maxiter : int

245 Maximum number of iterations.

246 iprint : int, optional

247 Controls the frequency of output. ``iprint < 0`` means no output;

248 ``iprint = 0`` print only one line at the last iteration;

249 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;

250 ``iprint = 99`` print details of every iteration except n-vectors;

251 ``iprint = 100`` print also the changes of active set and final x;

252 ``iprint > 100`` print details of every iteration including x and g.

253 maxls : int, optional

254 Maximum number of line search steps (per iteration). Default is 20.

255 finite_diff_rel_step : None or array_like, optional

256 If `jac in ['2-point', '3-point', 'cs']` the relative step size to

257 use for numerical approximation of the jacobian. The absolute step

258 size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,

259 possibly adjusted to fit into the bounds. For ``method='3-point'``

260 the sign of `h` is ignored. If None (default) then step is selected

261 automatically.

262

263 Notes

264 -----

265 The option `ftol` is exposed via the `scipy.optimize.minimize` interface,

266 but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The

267 relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.

268 I.e., `factr` multiplies the default machine floating-point precision to

269 arrive at `ftol`.

270

271 """

272 _check_unknown_options(unknown_options)

273 m = maxcor

274 pgtol = gtol

275 factr = ftol / np.finfo(float).eps

276

277 x0 = asarray(x0).ravel()

278 n, = x0.shape

279

280 if bounds is None:

281 bounds = [(None, None)] * n

282 if len(bounds) != n:

283 raise ValueError('length of x0 != length of bounds')

284

285 # unbounded variables must use None, not +-inf, for optimizer to work properly

286 bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds]

287 # LBFGSB is sent 'old-style' bounds, 'new-style' bounds are required by

288 # approx_derivative and ScalarFunction

289 new_bounds = old_bound_to_new(bounds)

290

291 # check bounds

292 if (new_bounds[0] > new_bounds[1]).any():

293 raise ValueError("LBFGSB - one of the lower bounds is greater than an upper bound.")

294

295 # initial vector must lie within the bounds. Otherwise ScalarFunction and

296 # approx_derivative will cause problems

297 x0 = np.clip(x0, new_bounds[0], new_bounds[1])

298

299 if disp is not None:

300 if disp == 0:

301 iprint = -1

302 else:

303 iprint = disp

304

305 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,

306 bounds=new_bounds,

307 finite_diff_rel_step=finite_diff_rel_step)

308

309 func_and_grad = sf.fun_and_grad

310

311 fortran_int = _lbfgsb.types.intvar.dtype

312

313 nbd = zeros(n, fortran_int)

314 low_bnd = zeros(n, float64)

315 upper_bnd = zeros(n, float64)

316 bounds_map = {(None, None): 0,

317 (1, None): 1,

318 (1, 1): 2,

319 (None, 1): 3}

320 for i in range(0, n):

321 l, u = bounds[i]

322 if l is not None:

323 low_bnd[i] = l

324 l = 1

325 if u is not None:

326 upper_bnd[i] = u

327 u = 1

328 nbd[i] = bounds_map[l, u]

329

330 if not maxls > 0:

331 raise ValueError('maxls must be positive.')

332

333 x = array(x0, float64)

334 f = array(0.0, float64)

335 g = zeros((n,), float64)

336 wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)

337 iwa = zeros(3*n, fortran_int)

338 task = zeros(1, 'S60')

339 csave = zeros(1, 'S60')

340 lsave = zeros(4, fortran_int)

341 isave = zeros(44, fortran_int)

342 dsave = zeros(29, float64)

343

344 task[:] = 'START'

345

346 n_iterations = 0

347

348 while 1:

349 # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \

350 _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,

351 pgtol, wa, iwa, task, iprint, csave, lsave,

352 isave, dsave, maxls)

353 task_str = task.tobytes()

354 if task_str.startswith(b'FG'):

355 # The minimization routine wants f and g at the current x.

356 # Note that interruptions due to maxfun are postponed

357 # until the completion of the current minimization iteration.

358 # Overwrite f and g:

359 f, g = func_and_grad(x)

360 elif task_str.startswith(b'NEW_X'):

361 # new iteration

362 n_iterations += 1

363 if callback is not None:

364 callback(np.copy(x))

365

366 if n_iterations >= maxiter:

367 task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'

368 elif sf.nfev > maxfun:

369 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '

370 'EXCEEDS LIMIT')

371 else:

372 break

373

374 task_str = task.tobytes().strip(b'\x00').strip()

375 if task_str.startswith(b'CONV'):

376 warnflag = 0

377 elif sf.nfev > maxfun or n_iterations >= maxiter:

378 warnflag = 1

379 else:

380 warnflag = 2

381

382 # These two portions of the workspace are described in the mainlb

383 # subroutine in lbfgsb.f. See line 363.

384 s = wa[0: m*n].reshape(m, n)

385 y = wa[m*n: 2*m*n].reshape(m, n)

386

387 # See lbfgsb.f line 160 for this portion of the workspace.

388 # isave(31) = the total number of BFGS updates prior the current iteration;

389 n_bfgs_updates = isave[30]

390

391 n_corrs = min(n_bfgs_updates, maxcor)

392 hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])

393

394 task_str = task_str.decode()

395 return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,

396 njev=sf.ngev,

397 nit=n_iterations, status=warnflag, message=task_str,

398 x=x, success=(warnflag == 0), hess_inv=hess_inv)

399

400

401class LbfgsInvHessProduct(LinearOperator):

402 """Linear operator for the L-BFGS approximate inverse Hessian.

403

404 This operator computes the product of a vector with the approximate inverse

405 of the Hessian of the objective function, using the L-BFGS limited

406 memory approximation to the inverse Hessian, accumulated during the

407 optimization.

408

409 Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``

410 interface.

411

412 Parameters

413 ----------

414 sk : array_like, shape=(n_corr, n)

415 Array of `n_corr` most recent updates to the solution vector.

416 (See [1]).

417 yk : array_like, shape=(n_corr, n)

418 Array of `n_corr` most recent updates to the gradient. (See [1]).

419

420 References

421 ----------

422 .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited

423 storage." Mathematics of computation 35.151 (1980): 773-782.

424

425 """

426

427 def __init__(self, sk, yk):

428 """Construct the operator."""

429 if sk.shape != yk.shape or sk.ndim != 2:

430 raise ValueError('sk and yk must have matching shape, (n_corrs, n)')

431 n_corrs, n = sk.shape

432

433 super().__init__(dtype=np.float64, shape=(n, n))

434

435 self.sk = sk

436 self.yk = yk

437 self.n_corrs = n_corrs

438 self.rho = 1 / np.einsum('ij,ij->i', sk, yk)

439

440 def _matvec(self, x):

441 """Efficient matrix-vector multiply with the BFGS matrices.

442

443 This calculation is described in Section (4) of [1].

444

445 Parameters

446 ----------

447 x : ndarray

448 An array with shape (n,) or (n,1).

449

450 Returns

451 -------

452 y : ndarray

453 The matrix-vector product

454

455 """

456 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho

457 q = np.array(x, dtype=self.dtype, copy=True)

458 if q.ndim == 2 and q.shape[1] == 1:

459 q = q.reshape(-1)

460

461 alpha = np.empty(n_corrs)

462

463 for i in range(n_corrs-1, -1, -1):

464 alpha[i] = rho[i] * np.dot(s[i], q)

465 q = q - alpha[i]*y[i]

466

467 r = q

468 for i in range(n_corrs):

469 beta = rho[i] * np.dot(y[i], r)

470 r = r + s[i] * (alpha[i] - beta)

471

472 return r

473

474 def todense(self):

475 """Return a dense array representation of this operator.

476

477 Returns

478 -------

479 arr : ndarray, shape=(n, n)

480 An array with the same shape and containing

481 the same data represented by this `LinearOperator`.

482

483 """

484 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho

485 I = np.eye(*self.shape, dtype=self.dtype)

486 Hk = I

487

488 for i in range(n_corrs):

489 A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]

490 A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]

491

492 Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *

493 s[i][np.newaxis, :])

494 return Hk

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lbfgsb_py.py: 10%

131 statements