Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_hessian_update_strategy.py: 29%

133 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1"""Hessian update strategies for quasi-Newton optimization methods.""" 

2import numpy as np 

3from numpy.linalg import norm 

4from scipy.linalg import get_blas_funcs 

5from warnings import warn 

6 

7 

8__all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1'] 

9 

10 

11class HessianUpdateStrategy: 

12 """Interface for implementing Hessian update strategies. 

13 

14 Many optimization methods make use of Hessian (or inverse Hessian) 

15 approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS. 

16 Some of these approximations, however, do not actually need to store 

17 the entire matrix or can compute the internal matrix product with a 

18 given vector in a very efficiently manner. This class serves as an 

19 abstract interface between the optimization algorithm and the 

20 quasi-Newton update strategies, giving freedom of implementation 

21 to store and update the internal matrix as efficiently as possible. 

22 Different choices of initialization and update procedure will result 

23 in different quasi-Newton strategies. 

24 

25 Four methods should be implemented in derived classes: ``initialize``, 

26 ``update``, ``dot`` and ``get_matrix``. 

27 

28 Notes 

29 ----- 

30 Any instance of a class that implements this interface, 

31 can be accepted by the method ``minimize`` and used by 

32 the compatible solvers to approximate the Hessian (or 

33 inverse Hessian) used by the optimization algorithms. 

34 """ 

35 

36 def initialize(self, n, approx_type): 

37 """Initialize internal matrix. 

38 

39 Allocate internal memory for storing and updating 

40 the Hessian or its inverse. 

41 

42 Parameters 

43 ---------- 

44 n : int 

45 Problem dimension. 

46 approx_type : {'hess', 'inv_hess'} 

47 Selects either the Hessian or the inverse Hessian. 

48 When set to 'hess' the Hessian will be stored and updated. 

49 When set to 'inv_hess' its inverse will be used instead. 

50 """ 

51 raise NotImplementedError("The method ``initialize(n, approx_type)``" 

52 " is not implemented.") 

53 

54 def update(self, delta_x, delta_grad): 

55 """Update internal matrix. 

56 

57 Update Hessian matrix or its inverse (depending on how 'approx_type' 

58 is defined) using information about the last evaluated points. 

59 

60 Parameters 

61 ---------- 

62 delta_x : ndarray 

63 The difference between two points the gradient 

64 function have been evaluated at: ``delta_x = x2 - x1``. 

65 delta_grad : ndarray 

66 The difference between the gradients: 

67 ``delta_grad = grad(x2) - grad(x1)``. 

68 """ 

69 raise NotImplementedError("The method ``update(delta_x, delta_grad)``" 

70 " is not implemented.") 

71 

72 def dot(self, p): 

73 """Compute the product of the internal matrix with the given vector. 

74 

75 Parameters 

76 ---------- 

77 p : array_like 

78 1-D array representing a vector. 

79 

80 Returns 

81 ------- 

82 Hp : array 

83 1-D represents the result of multiplying the approximation matrix 

84 by vector p. 

85 """ 

86 raise NotImplementedError("The method ``dot(p)``" 

87 " is not implemented.") 

88 

89 def get_matrix(self): 

90 """Return current internal matrix. 

91 

92 Returns 

93 ------- 

94 H : ndarray, shape (n, n) 

95 Dense matrix containing either the Hessian 

96 or its inverse (depending on how 'approx_type' 

97 is defined). 

98 """ 

99 raise NotImplementedError("The method ``get_matrix(p)``" 

100 " is not implemented.") 

101 

102 

103class FullHessianUpdateStrategy(HessianUpdateStrategy): 

104 """Hessian update strategy with full dimensional internal representation. 

105 """ 

106 _syr = get_blas_funcs('syr', dtype='d') # Symmetric rank 1 update 

107 _syr2 = get_blas_funcs('syr2', dtype='d') # Symmetric rank 2 update 

108 # Symmetric matrix-vector product 

109 _symv = get_blas_funcs('symv', dtype='d') 

110 

111 def __init__(self, init_scale='auto'): 

112 self.init_scale = init_scale 

113 # Until initialize is called we can't really use the class, 

114 # so it makes sense to set everything to None. 

115 self.first_iteration = None 

116 self.approx_type = None 

117 self.B = None 

118 self.H = None 

119 

120 def initialize(self, n, approx_type): 

121 """Initialize internal matrix. 

122 

123 Allocate internal memory for storing and updating 

124 the Hessian or its inverse. 

125 

126 Parameters 

127 ---------- 

128 n : int 

129 Problem dimension. 

130 approx_type : {'hess', 'inv_hess'} 

131 Selects either the Hessian or the inverse Hessian. 

132 When set to 'hess' the Hessian will be stored and updated. 

133 When set to 'inv_hess' its inverse will be used instead. 

134 """ 

135 self.first_iteration = True 

136 self.n = n 

137 self.approx_type = approx_type 

138 if approx_type not in ('hess', 'inv_hess'): 

139 raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.") 

140 # Create matrix 

141 if self.approx_type == 'hess': 

142 self.B = np.eye(n, dtype=float) 

143 else: 

144 self.H = np.eye(n, dtype=float) 

145 

146 def _auto_scale(self, delta_x, delta_grad): 

147 # Heuristic to scale matrix at first iteration. 

148 # Described in Nocedal and Wright "Numerical Optimization" 

149 # p.143 formula (6.20). 

150 s_norm2 = np.dot(delta_x, delta_x) 

151 y_norm2 = np.dot(delta_grad, delta_grad) 

152 ys = np.abs(np.dot(delta_grad, delta_x)) 

153 if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0: 

154 return 1 

155 if self.approx_type == 'hess': 

156 return y_norm2 / ys 

157 else: 

158 return ys / y_norm2 

159 

160 def _update_implementation(self, delta_x, delta_grad): 

161 raise NotImplementedError("The method ``_update_implementation``" 

162 " is not implemented.") 

163 

164 def update(self, delta_x, delta_grad): 

165 """Update internal matrix. 

166 

167 Update Hessian matrix or its inverse (depending on how 'approx_type' 

168 is defined) using information about the last evaluated points. 

169 

170 Parameters 

171 ---------- 

172 delta_x : ndarray 

173 The difference between two points the gradient 

174 function have been evaluated at: ``delta_x = x2 - x1``. 

175 delta_grad : ndarray 

176 The difference between the gradients: 

177 ``delta_grad = grad(x2) - grad(x1)``. 

178 """ 

179 if np.all(delta_x == 0.0): 

180 return 

181 if np.all(delta_grad == 0.0): 

182 warn('delta_grad == 0.0. Check if the approximated ' 

183 'function is linear. If the function is linear ' 

184 'better results can be obtained by defining the ' 

185 'Hessian as zero instead of using quasi-Newton ' 

186 'approximations.', UserWarning) 

187 return 

188 if self.first_iteration: 

189 # Get user specific scale 

190 if self.init_scale == "auto": 

191 scale = self._auto_scale(delta_x, delta_grad) 

192 else: 

193 scale = float(self.init_scale) 

194 # Scale initial matrix with ``scale * np.eye(n)`` 

195 if self.approx_type == 'hess': 

196 self.B *= scale 

197 else: 

198 self.H *= scale 

199 self.first_iteration = False 

200 self._update_implementation(delta_x, delta_grad) 

201 

202 def dot(self, p): 

203 """Compute the product of the internal matrix with the given vector. 

204 

205 Parameters 

206 ---------- 

207 p : array_like 

208 1-D array representing a vector. 

209 

210 Returns 

211 ------- 

212 Hp : array 

213 1-D represents the result of multiplying the approximation matrix 

214 by vector p. 

215 """ 

216 if self.approx_type == 'hess': 

217 return self._symv(1, self.B, p) 

218 else: 

219 return self._symv(1, self.H, p) 

220 

221 def get_matrix(self): 

222 """Return the current internal matrix. 

223 

224 Returns 

225 ------- 

226 M : ndarray, shape (n, n) 

227 Dense matrix containing either the Hessian or its inverse 

228 (depending on how `approx_type` was defined). 

229 """ 

230 if self.approx_type == 'hess': 

231 M = np.copy(self.B) 

232 else: 

233 M = np.copy(self.H) 

234 li = np.tril_indices_from(M, k=-1) 

235 M[li] = M.T[li] 

236 return M 

237 

238 

239class BFGS(FullHessianUpdateStrategy): 

240 """Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy. 

241 

242 Parameters 

243 ---------- 

244 exception_strategy : {'skip_update', 'damp_update'}, optional 

245 Define how to proceed when the curvature condition is violated. 

246 Set it to 'skip_update' to just skip the update. Or, alternatively, 

247 set it to 'damp_update' to interpolate between the actual BFGS 

248 result and the unmodified matrix. Both exceptions strategies 

249 are explained in [1]_, p.536-537. 

250 min_curvature : float 

251 This number, scaled by a normalization factor, defines the 

252 minimum curvature ``dot(delta_grad, delta_x)`` allowed to go 

253 unaffected by the exception strategy. By default is equal to 

254 1e-8 when ``exception_strategy = 'skip_update'`` and equal 

255 to 0.2 when ``exception_strategy = 'damp_update'``. 

256 init_scale : {float, 'auto'} 

257 Matrix scale at first iteration. At the first 

258 iteration the Hessian matrix or its inverse will be initialized 

259 with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension. 

260 Set it to 'auto' in order to use an automatic heuristic for choosing 

261 the initial scale. The heuristic is described in [1]_, p.143. 

262 By default uses 'auto'. 

263 

264 Notes 

265 ----- 

266 The update is based on the description in [1]_, p.140. 

267 

268 References 

269 ---------- 

270 .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization" 

271 Second Edition (2006). 

272 """ 

273 

274 def __init__(self, exception_strategy='skip_update', min_curvature=None, 

275 init_scale='auto'): 

276 if exception_strategy == 'skip_update': 

277 if min_curvature is not None: 

278 self.min_curvature = min_curvature 

279 else: 

280 self.min_curvature = 1e-8 

281 elif exception_strategy == 'damp_update': 

282 if min_curvature is not None: 

283 self.min_curvature = min_curvature 

284 else: 

285 self.min_curvature = 0.2 

286 else: 

287 raise ValueError("`exception_strategy` must be 'skip_update' " 

288 "or 'damp_update'.") 

289 

290 super().__init__(init_scale) 

291 self.exception_strategy = exception_strategy 

292 

293 def _update_inverse_hessian(self, ys, Hy, yHy, s): 

294 """Update the inverse Hessian matrix. 

295 

296 BFGS update using the formula: 

297 

298 ``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T) 

299 - 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)`` 

300 

301 where ``s = delta_x`` and ``y = delta_grad``. This formula is 

302 equivalent to (6.17) in [1]_ written in a more efficient way 

303 for implementation. 

304 

305 References 

306 ---------- 

307 .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization" 

308 Second Edition (2006). 

309 """ 

310 self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H) 

311 self.H = self._syr((ys+yHy)/ys**2, s, a=self.H) 

312 

313 def _update_hessian(self, ys, Bs, sBs, y): 

314 """Update the Hessian matrix. 

315 

316 BFGS update using the formula: 

317 

318 ``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y`` 

319 

320 where ``s`` is short for ``delta_x`` and ``y`` is short 

321 for ``delta_grad``. Formula (6.19) in [1]_. 

322 

323 References 

324 ---------- 

325 .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization" 

326 Second Edition (2006). 

327 """ 

328 self.B = self._syr(1.0 / ys, y, a=self.B) 

329 self.B = self._syr(-1.0 / sBs, Bs, a=self.B) 

330 

331 def _update_implementation(self, delta_x, delta_grad): 

332 # Auxiliary variables w and z 

333 if self.approx_type == 'hess': 

334 w = delta_x 

335 z = delta_grad 

336 else: 

337 w = delta_grad 

338 z = delta_x 

339 # Do some common operations 

340 wz = np.dot(w, z) 

341 Mw = self.dot(w) 

342 wMw = Mw.dot(w) 

343 # Guarantee that wMw > 0 by reinitializing matrix. 

344 # While this is always true in exact arithmetics, 

345 # indefinite matrix may appear due to roundoff errors. 

346 if wMw <= 0.0: 

347 scale = self._auto_scale(delta_x, delta_grad) 

348 # Reinitialize matrix 

349 if self.approx_type == 'hess': 

350 self.B = scale * np.eye(self.n, dtype=float) 

351 else: 

352 self.H = scale * np.eye(self.n, dtype=float) 

353 # Do common operations for new matrix 

354 Mw = self.dot(w) 

355 wMw = Mw.dot(w) 

356 # Check if curvature condition is violated 

357 if wz <= self.min_curvature * wMw: 

358 # If the option 'skip_update' is set 

359 # we just skip the update when the condion 

360 # is violated. 

361 if self.exception_strategy == 'skip_update': 

362 return 

363 # If the option 'damp_update' is set we 

364 # interpolate between the actual BFGS 

365 # result and the unmodified matrix. 

366 elif self.exception_strategy == 'damp_update': 

367 update_factor = (1-self.min_curvature) / (1 - wz/wMw) 

368 z = update_factor*z + (1-update_factor)*Mw 

369 wz = np.dot(w, z) 

370 # Update matrix 

371 if self.approx_type == 'hess': 

372 self._update_hessian(wz, Mw, wMw, z) 

373 else: 

374 self._update_inverse_hessian(wz, Mw, wMw, z) 

375 

376 

377class SR1(FullHessianUpdateStrategy): 

378 """Symmetric-rank-1 Hessian update strategy. 

379 

380 Parameters 

381 ---------- 

382 min_denominator : float 

383 This number, scaled by a normalization factor, 

384 defines the minimum denominator magnitude allowed 

385 in the update. When the condition is violated we skip 

386 the update. By default uses ``1e-8``. 

387 init_scale : {float, 'auto'}, optional 

388 Matrix scale at first iteration. At the first 

389 iteration the Hessian matrix or its inverse will be initialized 

390 with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension. 

391 Set it to 'auto' in order to use an automatic heuristic for choosing 

392 the initial scale. The heuristic is described in [1]_, p.143. 

393 By default uses 'auto'. 

394 

395 Notes 

396 ----- 

397 The update is based on the description in [1]_, p.144-146. 

398 

399 References 

400 ---------- 

401 .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization" 

402 Second Edition (2006). 

403 """ 

404 

405 def __init__(self, min_denominator=1e-8, init_scale='auto'): 

406 self.min_denominator = min_denominator 

407 super().__init__(init_scale) 

408 

409 def _update_implementation(self, delta_x, delta_grad): 

410 # Auxiliary variables w and z 

411 if self.approx_type == 'hess': 

412 w = delta_x 

413 z = delta_grad 

414 else: 

415 w = delta_grad 

416 z = delta_x 

417 # Do some common operations 

418 Mw = self.dot(w) 

419 z_minus_Mw = z - Mw 

420 denominator = np.dot(w, z_minus_Mw) 

421 # If the denominator is too small 

422 # we just skip the update. 

423 if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw): 

424 return 

425 # Update matrix 

426 if self.approx_type == 'hess': 

427 self.B = self._syr(1/denominator, z_minus_Mw, a=self.B) 

428 else: 

429 self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)