Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lbfgsb_py.py: 10%

131 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1""" 

2Functions 

3--------- 

4.. autosummary:: 

5 :toctree: generated/ 

6 

7 fmin_l_bfgs_b 

8 

9""" 

10 

11## License for the Python wrapper 

12## ============================== 

13 

14## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca> 

15 

16## Permission is hereby granted, free of charge, to any person obtaining a 

17## copy of this software and associated documentation files (the "Software"), 

18## to deal in the Software without restriction, including without limitation 

19## the rights to use, copy, modify, merge, publish, distribute, sublicense, 

20## and/or sell copies of the Software, and to permit persons to whom the 

21## Software is furnished to do so, subject to the following conditions: 

22 

23## The above copyright notice and this permission notice shall be included in 

24## all copies or substantial portions of the Software. 

25 

26## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 

27## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 

28## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 

29## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 

30## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 

31## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 

32## DEALINGS IN THE SOFTWARE. 

33 

34## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy 

35 

36import numpy as np 

37from numpy import array, asarray, float64, zeros 

38from . import _lbfgsb 

39from ._optimize import (MemoizeJac, OptimizeResult, 

40 _check_unknown_options, _prepare_scalar_function) 

41from ._constraints import old_bound_to_new 

42 

43from scipy.sparse.linalg import LinearOperator 

44 

45__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct'] 

46 

47 

48def fmin_l_bfgs_b(func, x0, fprime=None, args=(), 

49 approx_grad=0, 

50 bounds=None, m=10, factr=1e7, pgtol=1e-5, 

51 epsilon=1e-8, 

52 iprint=-1, maxfun=15000, maxiter=15000, disp=None, 

53 callback=None, maxls=20): 

54 """ 

55 Minimize a function func using the L-BFGS-B algorithm. 

56 

57 Parameters 

58 ---------- 

59 func : callable f(x,*args) 

60 Function to minimize. 

61 x0 : ndarray 

62 Initial guess. 

63 fprime : callable fprime(x,*args), optional 

64 The gradient of `func`. If None, then `func` returns the function 

65 value and the gradient (``f, g = func(x, *args)``), unless 

66 `approx_grad` is True in which case `func` returns only ``f``. 

67 args : sequence, optional 

68 Arguments to pass to `func` and `fprime`. 

69 approx_grad : bool, optional 

70 Whether to approximate the gradient numerically (in which case 

71 `func` returns only the function value). 

72 bounds : list, optional 

73 ``(min, max)`` pairs for each element in ``x``, defining 

74 the bounds on that parameter. Use None or +-inf for one of ``min`` or 

75 ``max`` when there is no bound in that direction. 

76 m : int, optional 

77 The maximum number of variable metric corrections 

78 used to define the limited memory matrix. (The limited memory BFGS 

79 method does not store the full hessian but uses this many terms in an 

80 approximation to it.) 

81 factr : float, optional 

82 The iteration stops when 

83 ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``, 

84 where ``eps`` is the machine precision, which is automatically 

85 generated by the code. Typical values for `factr` are: 1e12 for 

86 low accuracy; 1e7 for moderate accuracy; 10.0 for extremely 

87 high accuracy. See Notes for relationship to `ftol`, which is exposed 

88 (instead of `factr`) by the `scipy.optimize.minimize` interface to 

89 L-BFGS-B. 

90 pgtol : float, optional 

91 The iteration will stop when 

92 ``max{|proj g_i | i = 1, ..., n} <= pgtol`` 

93 where ``pg_i`` is the i-th component of the projected gradient. 

94 epsilon : float, optional 

95 Step size used when `approx_grad` is True, for numerically 

96 calculating the gradient 

97 iprint : int, optional 

98 Controls the frequency of output. ``iprint < 0`` means no output; 

99 ``iprint = 0`` print only one line at the last iteration; 

100 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations; 

101 ``iprint = 99`` print details of every iteration except n-vectors; 

102 ``iprint = 100`` print also the changes of active set and final x; 

103 ``iprint > 100`` print details of every iteration including x and g. 

104 disp : int, optional 

105 If zero, then no output. If a positive number, then this over-rides 

106 `iprint` (i.e., `iprint` gets the value of `disp`). 

107 maxfun : int, optional 

108 Maximum number of function evaluations. Note that this function 

109 may violate the limit because of evaluating gradients by numerical 

110 differentiation. 

111 maxiter : int, optional 

112 Maximum number of iterations. 

113 callback : callable, optional 

114 Called after each iteration, as ``callback(xk)``, where ``xk`` is the 

115 current parameter vector. 

116 maxls : int, optional 

117 Maximum number of line search steps (per iteration). Default is 20. 

118 

119 Returns 

120 ------- 

121 x : array_like 

122 Estimated position of the minimum. 

123 f : float 

124 Value of `func` at the minimum. 

125 d : dict 

126 Information dictionary. 

127 

128 * d['warnflag'] is 

129 

130 - 0 if converged, 

131 - 1 if too many function evaluations or too many iterations, 

132 - 2 if stopped for another reason, given in d['task'] 

133 

134 * d['grad'] is the gradient at the minimum (should be 0 ish) 

135 * d['funcalls'] is the number of function calls made. 

136 * d['nit'] is the number of iterations. 

137 

138 See also 

139 -------- 

140 minimize: Interface to minimization algorithms for multivariate 

141 functions. See the 'L-BFGS-B' `method` in particular. Note that the 

142 `ftol` option is made available via that interface, while `factr` is 

143 provided via this interface, where `factr` is the factor multiplying 

144 the default machine floating-point precision to arrive at `ftol`: 

145 ``ftol = factr * numpy.finfo(float).eps``. 

146 

147 Notes 

148 ----- 

149 License of L-BFGS-B (FORTRAN code): 

150 

151 The version included here (in fortran code) is 3.0 

152 (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd, 

153 and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following 

154 condition for use: 

155 

156 This software is freely available, but we expect that all publications 

157 describing work using this software, or all commercial products using it, 

158 quote at least one of the references given below. This software is released 

159 under the BSD License. 

160 

161 References 

162 ---------- 

163 * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound 

164 Constrained Optimization, (1995), SIAM Journal on Scientific and 

165 Statistical Computing, 16, 5, pp. 1190-1208. 

166 * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B, 

167 FORTRAN routines for large scale bound constrained optimization (1997), 

168 ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560. 

169 * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B, 

170 FORTRAN routines for large scale bound constrained optimization (2011), 

171 ACM Transactions on Mathematical Software, 38, 1. 

172 

173 """ 

174 # handle fprime/approx_grad 

175 if approx_grad: 

176 fun = func 

177 jac = None 

178 elif fprime is None: 

179 fun = MemoizeJac(func) 

180 jac = fun.derivative 

181 else: 

182 fun = func 

183 jac = fprime 

184 

185 # build options 

186 opts = {'disp': disp, 

187 'iprint': iprint, 

188 'maxcor': m, 

189 'ftol': factr * np.finfo(float).eps, 

190 'gtol': pgtol, 

191 'eps': epsilon, 

192 'maxfun': maxfun, 

193 'maxiter': maxiter, 

194 'callback': callback, 

195 'maxls': maxls} 

196 

197 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds, 

198 **opts) 

199 d = {'grad': res['jac'], 

200 'task': res['message'], 

201 'funcalls': res['nfev'], 

202 'nit': res['nit'], 

203 'warnflag': res['status']} 

204 f = res['fun'] 

205 x = res['x'] 

206 

207 return x, f, d 

208 

209 

210def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None, 

211 disp=None, maxcor=10, ftol=2.2204460492503131e-09, 

212 gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000, 

213 iprint=-1, callback=None, maxls=20, 

214 finite_diff_rel_step=None, **unknown_options): 

215 """ 

216 Minimize a scalar function of one or more variables using the L-BFGS-B 

217 algorithm. 

218 

219 Options 

220 ------- 

221 disp : None or int 

222 If `disp is None` (the default), then the supplied version of `iprint` 

223 is used. If `disp is not None`, then it overrides the supplied version 

224 of `iprint` with the behaviour you outlined. 

225 maxcor : int 

226 The maximum number of variable metric corrections used to 

227 define the limited memory matrix. (The limited memory BFGS 

228 method does not store the full hessian but uses this many terms 

229 in an approximation to it.) 

230 ftol : float 

231 The iteration stops when ``(f^k - 

232 f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``. 

233 gtol : float 

234 The iteration will stop when ``max{|proj g_i | i = 1, ..., n} 

235 <= gtol`` where ``pg_i`` is the i-th component of the 

236 projected gradient. 

237 eps : float or ndarray 

238 If `jac is None` the absolute step size used for numerical 

239 approximation of the jacobian via forward differences. 

240 maxfun : int 

241 Maximum number of function evaluations. Note that this function 

242 may violate the limit because of evaluating gradients by numerical 

243 differentiation. 

244 maxiter : int 

245 Maximum number of iterations. 

246 iprint : int, optional 

247 Controls the frequency of output. ``iprint < 0`` means no output; 

248 ``iprint = 0`` print only one line at the last iteration; 

249 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations; 

250 ``iprint = 99`` print details of every iteration except n-vectors; 

251 ``iprint = 100`` print also the changes of active set and final x; 

252 ``iprint > 100`` print details of every iteration including x and g. 

253 maxls : int, optional 

254 Maximum number of line search steps (per iteration). Default is 20. 

255 finite_diff_rel_step : None or array_like, optional 

256 If `jac in ['2-point', '3-point', 'cs']` the relative step size to 

257 use for numerical approximation of the jacobian. The absolute step 

258 size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``, 

259 possibly adjusted to fit into the bounds. For ``method='3-point'`` 

260 the sign of `h` is ignored. If None (default) then step is selected 

261 automatically. 

262 

263 Notes 

264 ----- 

265 The option `ftol` is exposed via the `scipy.optimize.minimize` interface, 

266 but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The 

267 relationship between the two is ``ftol = factr * numpy.finfo(float).eps``. 

268 I.e., `factr` multiplies the default machine floating-point precision to 

269 arrive at `ftol`. 

270 

271 """ 

272 _check_unknown_options(unknown_options) 

273 m = maxcor 

274 pgtol = gtol 

275 factr = ftol / np.finfo(float).eps 

276 

277 x0 = asarray(x0).ravel() 

278 n, = x0.shape 

279 

280 if bounds is None: 

281 bounds = [(None, None)] * n 

282 if len(bounds) != n: 

283 raise ValueError('length of x0 != length of bounds') 

284 

285 # unbounded variables must use None, not +-inf, for optimizer to work properly 

286 bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds] 

287 # LBFGSB is sent 'old-style' bounds, 'new-style' bounds are required by 

288 # approx_derivative and ScalarFunction 

289 new_bounds = old_bound_to_new(bounds) 

290 

291 # check bounds 

292 if (new_bounds[0] > new_bounds[1]).any(): 

293 raise ValueError("LBFGSB - one of the lower bounds is greater than an upper bound.") 

294 

295 # initial vector must lie within the bounds. Otherwise ScalarFunction and 

296 # approx_derivative will cause problems 

297 x0 = np.clip(x0, new_bounds[0], new_bounds[1]) 

298 

299 if disp is not None: 

300 if disp == 0: 

301 iprint = -1 

302 else: 

303 iprint = disp 

304 

305 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps, 

306 bounds=new_bounds, 

307 finite_diff_rel_step=finite_diff_rel_step) 

308 

309 func_and_grad = sf.fun_and_grad 

310 

311 fortran_int = _lbfgsb.types.intvar.dtype 

312 

313 nbd = zeros(n, fortran_int) 

314 low_bnd = zeros(n, float64) 

315 upper_bnd = zeros(n, float64) 

316 bounds_map = {(None, None): 0, 

317 (1, None): 1, 

318 (1, 1): 2, 

319 (None, 1): 3} 

320 for i in range(0, n): 

321 l, u = bounds[i] 

322 if l is not None: 

323 low_bnd[i] = l 

324 l = 1 

325 if u is not None: 

326 upper_bnd[i] = u 

327 u = 1 

328 nbd[i] = bounds_map[l, u] 

329 

330 if not maxls > 0: 

331 raise ValueError('maxls must be positive.') 

332 

333 x = array(x0, float64) 

334 f = array(0.0, float64) 

335 g = zeros((n,), float64) 

336 wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64) 

337 iwa = zeros(3*n, fortran_int) 

338 task = zeros(1, 'S60') 

339 csave = zeros(1, 'S60') 

340 lsave = zeros(4, fortran_int) 

341 isave = zeros(44, fortran_int) 

342 dsave = zeros(29, float64) 

343 

344 task[:] = 'START' 

345 

346 n_iterations = 0 

347 

348 while 1: 

349 # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \ 

350 _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr, 

351 pgtol, wa, iwa, task, iprint, csave, lsave, 

352 isave, dsave, maxls) 

353 task_str = task.tobytes() 

354 if task_str.startswith(b'FG'): 

355 # The minimization routine wants f and g at the current x. 

356 # Note that interruptions due to maxfun are postponed 

357 # until the completion of the current minimization iteration. 

358 # Overwrite f and g: 

359 f, g = func_and_grad(x) 

360 elif task_str.startswith(b'NEW_X'): 

361 # new iteration 

362 n_iterations += 1 

363 if callback is not None: 

364 callback(np.copy(x)) 

365 

366 if n_iterations >= maxiter: 

367 task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT' 

368 elif sf.nfev > maxfun: 

369 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS ' 

370 'EXCEEDS LIMIT') 

371 else: 

372 break 

373 

374 task_str = task.tobytes().strip(b'\x00').strip() 

375 if task_str.startswith(b'CONV'): 

376 warnflag = 0 

377 elif sf.nfev > maxfun or n_iterations >= maxiter: 

378 warnflag = 1 

379 else: 

380 warnflag = 2 

381 

382 # These two portions of the workspace are described in the mainlb 

383 # subroutine in lbfgsb.f. See line 363. 

384 s = wa[0: m*n].reshape(m, n) 

385 y = wa[m*n: 2*m*n].reshape(m, n) 

386 

387 # See lbfgsb.f line 160 for this portion of the workspace. 

388 # isave(31) = the total number of BFGS updates prior the current iteration; 

389 n_bfgs_updates = isave[30] 

390 

391 n_corrs = min(n_bfgs_updates, maxcor) 

392 hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs]) 

393 

394 task_str = task_str.decode() 

395 return OptimizeResult(fun=f, jac=g, nfev=sf.nfev, 

396 njev=sf.ngev, 

397 nit=n_iterations, status=warnflag, message=task_str, 

398 x=x, success=(warnflag == 0), hess_inv=hess_inv) 

399 

400 

401class LbfgsInvHessProduct(LinearOperator): 

402 """Linear operator for the L-BFGS approximate inverse Hessian. 

403 

404 This operator computes the product of a vector with the approximate inverse 

405 of the Hessian of the objective function, using the L-BFGS limited 

406 memory approximation to the inverse Hessian, accumulated during the 

407 optimization. 

408 

409 Objects of this class implement the ``scipy.sparse.linalg.LinearOperator`` 

410 interface. 

411 

412 Parameters 

413 ---------- 

414 sk : array_like, shape=(n_corr, n) 

415 Array of `n_corr` most recent updates to the solution vector. 

416 (See [1]). 

417 yk : array_like, shape=(n_corr, n) 

418 Array of `n_corr` most recent updates to the gradient. (See [1]). 

419 

420 References 

421 ---------- 

422 .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited 

423 storage." Mathematics of computation 35.151 (1980): 773-782. 

424 

425 """ 

426 

427 def __init__(self, sk, yk): 

428 """Construct the operator.""" 

429 if sk.shape != yk.shape or sk.ndim != 2: 

430 raise ValueError('sk and yk must have matching shape, (n_corrs, n)') 

431 n_corrs, n = sk.shape 

432 

433 super().__init__(dtype=np.float64, shape=(n, n)) 

434 

435 self.sk = sk 

436 self.yk = yk 

437 self.n_corrs = n_corrs 

438 self.rho = 1 / np.einsum('ij,ij->i', sk, yk) 

439 

440 def _matvec(self, x): 

441 """Efficient matrix-vector multiply with the BFGS matrices. 

442 

443 This calculation is described in Section (4) of [1]. 

444 

445 Parameters 

446 ---------- 

447 x : ndarray 

448 An array with shape (n,) or (n,1). 

449 

450 Returns 

451 ------- 

452 y : ndarray 

453 The matrix-vector product 

454 

455 """ 

456 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho 

457 q = np.array(x, dtype=self.dtype, copy=True) 

458 if q.ndim == 2 and q.shape[1] == 1: 

459 q = q.reshape(-1) 

460 

461 alpha = np.empty(n_corrs) 

462 

463 for i in range(n_corrs-1, -1, -1): 

464 alpha[i] = rho[i] * np.dot(s[i], q) 

465 q = q - alpha[i]*y[i] 

466 

467 r = q 

468 for i in range(n_corrs): 

469 beta = rho[i] * np.dot(y[i], r) 

470 r = r + s[i] * (alpha[i] - beta) 

471 

472 return r 

473 

474 def todense(self): 

475 """Return a dense array representation of this operator. 

476 

477 Returns 

478 ------- 

479 arr : ndarray, shape=(n, n) 

480 An array with the same shape and containing 

481 the same data represented by this `LinearOperator`. 

482 

483 """ 

484 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho 

485 I = np.eye(*self.shape, dtype=self.dtype) 

486 Hk = I 

487 

488 for i in range(n_corrs): 

489 A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i] 

490 A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i] 

491 

492 Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] * 

493 s[i][np.newaxis, :]) 

494 return Hk