Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/

1"""Functions used by least-squares algorithms."""

2from math import copysign

4import numpy as np

5from numpy.linalg import norm

7from scipy.linalg import cho_factor, cho_solve, LinAlgError

8from scipy.sparse import issparse

9from scipy.sparse.linalg import LinearOperator, aslinearoperator

12EPS = np.finfo(float).eps

15# Functions related to a trust-region problem.

18def intersect_trust_region(x, s, Delta):

19 """Find the intersection of a line with the boundary of a trust region.

21 This function solves the quadratic equation with respect to t

22 ||(x + s*t)||**2 = Delta**2.

24 Returns

25 -------

26 t_neg, t_pos : tuple of float

27 Negative and positive roots.

29 Raises

30 ------

31 ValueError

32 If `s` is zero or `x` is not within the trust region.

33 """

34 a = np.dot(s, s)

35 if a == 0:

36 raise ValueError("`s` is zero.")

38 b = np.dot(x, s)

40 c = np.dot(x, x) - Delta**2

41 if c > 0:

42 raise ValueError("`x` is not within the trust region.")

44 d = np.sqrt(b*b - a*c) # Root from one fourth of the discriminant.

46 # Computations below avoid loss of significance, see "Numerical Recipes".

47 q = -(b + copysign(d, b))

48 t1 = q / a

49 t2 = c / q

51 if t1 < t2:

52 return t1, t2

53 else:

54 return t2, t1

57def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,

58 rtol=0.01, max_iter=10):

59 """Solve a trust-region problem arising in least-squares minimization.

61 This function implements a method described by J. J. More [1]_ and used

62 in MINPACK, but it relies on a single SVD of Jacobian instead of series

63 of Cholesky decompositions. Before running this function, compute:

64 ``U, s, VT = svd(J, full_matrices=False)``.

66 Parameters

67 ----------

68 n : int

69 Number of variables.

70 m : int

71 Number of residuals.

72 uf : ndarray

73 Computed as U.T.dot(f).

74 s : ndarray

75 Singular values of J.

76 V : ndarray

77 Transpose of VT.

78 Delta : float

79 Radius of a trust region.

80 initial_alpha : float, optional

81 Initial guess for alpha, which might be available from a previous

82 iteration. If None, determined automatically.

83 rtol : float, optional

84 Stopping tolerance for the root-finding procedure. Namely, the

85 solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.

86 max_iter : int, optional

87 Maximum allowed number of iterations for the root-finding procedure.

89 Returns

90 -------

91 p : ndarray, shape (n,)

92 Found solution of a trust-region problem.

93 alpha : float

94 Positive value such that (J.T*J + alpha*I)*p = -J.T*f.

95 Sometimes called Levenberg-Marquardt parameter.

96 n_iter : int

97 Number of iterations made by root-finding procedure. Zero means

98 that Gauss-Newton step was selected as the solution.

100 References

101 ----------

102 .. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation

103 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes

104 in Mathematics 630, Springer Verlag, pp. 105-116, 1977.

105 """

106 def phi_and_derivative(alpha, suf, s, Delta):

107 """Function of which to find zero.

108

109 It is defined as "norm of regularized (by alpha) least-squares

110 solution minus `Delta`". Refer to [1]_.

111 """

112 denom = s**2 + alpha

113 p_norm = norm(suf / denom)

114 phi = p_norm - Delta

115 phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm

116 return phi, phi_prime

117

118 suf = s * uf

119

120 # Check if J has full rank and try Gauss-Newton step.

121 if m >= n:

122 threshold = EPS * m * s[0]

123 full_rank = s[-1] > threshold

124 else:

125 full_rank = False

126

127 if full_rank:

128 p = -V.dot(uf / s)

129 if norm(p) <= Delta:

130 return p, 0.0, 0

131

132 alpha_upper = norm(suf) / Delta

133

134 if full_rank:

135 phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)

136 alpha_lower = -phi / phi_prime

137 else:

138 alpha_lower = 0.0

139

140 if initial_alpha is None or not full_rank and initial_alpha == 0:

141 alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)

142 else:

143 alpha = initial_alpha

144

145 for it in range(max_iter):

146 if alpha < alpha_lower or alpha > alpha_upper:

147 alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)

148

149 phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)

150

151 if phi < 0:

152 alpha_upper = alpha

153

154 ratio = phi / phi_prime

155 alpha_lower = max(alpha_lower, alpha - ratio)

156 alpha -= (phi + Delta) * ratio / Delta

157

158 if np.abs(phi) < rtol * Delta:

159 break

160

161 p = -V.dot(suf / (s**2 + alpha))

162

163 # Make the norm of p equal to Delta, p is changed only slightly during

164 # this. It is done to prevent p lie outside the trust region (which can

165 # cause problems later).

166 p *= Delta / norm(p)

167

168 return p, alpha, it + 1

169

170

171def solve_trust_region_2d(B, g, Delta):

172 """Solve a general trust-region problem in 2 dimensions.

173

174 The problem is reformulated as a 4th order algebraic equation,

175 the solution of which is found by numpy.roots.

176

177 Parameters

178 ----------

179 B : ndarray, shape (2, 2)

180 Symmetric matrix, defines a quadratic term of the function.

181 g : ndarray, shape (2,)

182 Defines a linear term of the function.

183 Delta : float

184 Radius of a trust region.

185

186 Returns

187 -------

188 p : ndarray, shape (2,)

189 Found solution.

190 newton_step : bool

191 Whether the returned solution is the Newton step which lies within

192 the trust region.

193 """

194 try:

195 R, lower = cho_factor(B)

196 p = -cho_solve((R, lower), g)

197 if np.dot(p, p) <= Delta**2:

198 return p, True

199 except LinAlgError:

200 pass

201

202 a = B[0, 0] * Delta**2

203 b = B[0, 1] * Delta**2

204 c = B[1, 1] * Delta**2

205

206 d = g[0] * Delta

207 f = g[1] * Delta

208

209 coeffs = np.array(

210 [-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])

211 t = np.roots(coeffs) # Can handle leading zeros.

212 t = np.real(t[np.isreal(t)])

213

214 p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))

215 value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)

216 i = np.argmin(value)

217 p = p[:, i]

218

219 return p, False

220

221

222def update_tr_radius(Delta, actual_reduction, predicted_reduction,

223 step_norm, bound_hit):

224 """Update the radius of a trust region based on the cost reduction.

225

226 Returns

227 -------

228 Delta : float

229 New radius.

230 ratio : float

231 Ratio between actual and predicted reductions.

232 """

233 if predicted_reduction > 0:

234 ratio = actual_reduction / predicted_reduction

235 elif predicted_reduction == actual_reduction == 0:

236 ratio = 1

237 else:

238 ratio = 0

239

240 if ratio < 0.25:

241 Delta = 0.25 * step_norm

242 elif ratio > 0.75 and bound_hit:

243 Delta *= 2.0

244

245 return Delta, ratio

246

247

248# Construction and minimization of quadratic functions.

249

250

251def build_quadratic_1d(J, g, s, diag=None, s0=None):

252 """Parameterize a multivariate quadratic function along a line.

253

254 The resulting univariate quadratic function is given as follows::

255

256 f(t) = 0.5 * (s0 + s*t).T * (J.T*J + diag) * (s0 + s*t) +

257 g.T * (s0 + s*t)

258

259 Parameters

260 ----------

261 J : ndarray, sparse matrix or LinearOperator shape (m, n)

262 Jacobian matrix, affects the quadratic term.

263 g : ndarray, shape (n,)

264 Gradient, defines the linear term.

265 s : ndarray, shape (n,)

266 Direction vector of a line.

267 diag : None or ndarray with shape (n,), optional

268 Addition diagonal part, affects the quadratic term.

269 If None, assumed to be 0.

270 s0 : None or ndarray with shape (n,), optional

271 Initial point. If None, assumed to be 0.

272

273 Returns

274 -------

275 a : float

276 Coefficient for t**2.

277 b : float

278 Coefficient for t.

279 c : float

280 Free term. Returned only if `s0` is provided.

281 """

282 v = J.dot(s)

283 a = np.dot(v, v)

284 if diag is not None:

285 a += np.dot(s * diag, s)

286 a *= 0.5

287

288 b = np.dot(g, s)

289

290 if s0 is not None:

291 u = J.dot(s0)

292 b += np.dot(u, v)

293 c = 0.5 * np.dot(u, u) + np.dot(g, s0)

294 if diag is not None:

295 b += np.dot(s0 * diag, s)

296 c += 0.5 * np.dot(s0 * diag, s0)

297 return a, b, c

298 else:

299 return a, b

300

301

302def minimize_quadratic_1d(a, b, lb, ub, c=0):

303 """Minimize a 1-D quadratic function subject to bounds.

304

305 The free term `c` is 0 by default. Bounds must be finite.

306

307 Returns

308 -------

309 t : float

310 Minimum point.

311 y : float

312 Minimum value.

313 """

314 t = [lb, ub]

315 if a != 0:

316 extremum = -0.5 * b / a

317 if lb < extremum < ub:

318 t.append(extremum)

319 t = np.asarray(t)

320 y = t * (a * t + b) + c

321 min_index = np.argmin(y)

322 return t[min_index], y[min_index]

323

324

325def evaluate_quadratic(J, g, s, diag=None):

326 """Compute values of a quadratic function arising in least squares.

327

328 The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.

329

330 Parameters

331 ----------

332 J : ndarray, sparse matrix or LinearOperator, shape (m, n)

333 Jacobian matrix, affects the quadratic term.

334 g : ndarray, shape (n,)

335 Gradient, defines the linear term.

336 s : ndarray, shape (k, n) or (n,)

337 Array containing steps as rows.

338 diag : ndarray, shape (n,), optional

339 Addition diagonal part, affects the quadratic term.

340 If None, assumed to be 0.

341

342 Returns

343 -------

344 values : ndarray with shape (k,) or float

345 Values of the function. If `s` was 2-D, then ndarray is

346 returned, otherwise, float is returned.

347 """

348 if s.ndim == 1:

349 Js = J.dot(s)

350 q = np.dot(Js, Js)

351 if diag is not None:

352 q += np.dot(s * diag, s)

353 else:

354 Js = J.dot(s.T)

355 q = np.sum(Js**2, axis=0)

356 if diag is not None:

357 q += np.sum(diag * s**2, axis=1)

358

359 l = np.dot(s, g)

360

361 return 0.5 * q + l

362

363

364# Utility functions to work with bound constraints.

365

366

367def in_bounds(x, lb, ub):

368 """Check if a point lies within bounds."""

369 return np.all((x >= lb) & (x <= ub))

370

371

372def step_size_to_bound(x, s, lb, ub):

373 """Compute a min_step size required to reach a bound.

374

375 The function computes a positive scalar t, such that x + s * t is on

376 the bound.

377

378 Returns

379 -------

380 step : float

381 Computed step. Non-negative value.

382 hits : ndarray of int with shape of x

383 Each element indicates whether a corresponding variable reaches the

384 bound:

385

386 * 0 - the bound was not hit.

387 * -1 - the lower bound was hit.

388 * 1 - the upper bound was hit.

389 """

390 non_zero = np.nonzero(s)

391 s_non_zero = s[non_zero]

392 steps = np.empty_like(x)

393 steps.fill(np.inf)

394 with np.errstate(over='ignore'):

395 steps[non_zero] = np.maximum((lb - x)[non_zero] / s_non_zero,

396 (ub - x)[non_zero] / s_non_zero)

397 min_step = np.min(steps)

398 return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)

399

400

401def find_active_constraints(x, lb, ub, rtol=1e-10):

402 """Determine which constraints are active in a given point.

403

404 The threshold is computed using `rtol` and the absolute value of the

405 closest bound.

406

407 Returns

408 -------

409 active : ndarray of int with shape of x

410 Each component shows whether the corresponding constraint is active:

411

412 * 0 - a constraint is not active.

413 * -1 - a lower bound is active.

414 * 1 - a upper bound is active.

415 """

416 active = np.zeros_like(x, dtype=int)

417

418 if rtol == 0:

419 active[x <= lb] = -1

420 active[x >= ub] = 1

421 return active

422

423 lower_dist = x - lb

424 upper_dist = ub - x

425

426 lower_threshold = rtol * np.maximum(1, np.abs(lb))

427 upper_threshold = rtol * np.maximum(1, np.abs(ub))

428

429 lower_active = (np.isfinite(lb) &

430 (lower_dist <= np.minimum(upper_dist, lower_threshold)))

431 active[lower_active] = -1

432

433 upper_active = (np.isfinite(ub) &

434 (upper_dist <= np.minimum(lower_dist, upper_threshold)))

435 active[upper_active] = 1

436

437 return active

438

439

440def make_strictly_feasible(x, lb, ub, rstep=1e-10):

441 """Shift a point to the interior of a feasible region.

442

443 Each element of the returned vector is at least at a relative distance

444 `rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.

445 """

446 x_new = x.copy()

447

448 active = find_active_constraints(x, lb, ub, rstep)

449 lower_mask = np.equal(active, -1)

450 upper_mask = np.equal(active, 1)

451

452 if rstep == 0:

453 x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])

454 x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])

455 else:

456 x_new[lower_mask] = (lb[lower_mask] +

457 rstep * np.maximum(1, np.abs(lb[lower_mask])))

458 x_new[upper_mask] = (ub[upper_mask] -

459 rstep * np.maximum(1, np.abs(ub[upper_mask])))

460

461 tight_bounds = (x_new < lb) | (x_new > ub)

462 x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])

463

464 return x_new

465

466

467def CL_scaling_vector(x, g, lb, ub):

468 """Compute Coleman-Li scaling vector and its derivatives.

469

470 Components of a vector v are defined as follows::

471

472 | ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf

473 v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf

474 | 1, otherwise

475

476 According to this definition v[i] >= 0 for all i. It differs from the

477 definition in paper [1]_ (eq. (2.2)), where the absolute value of v is

478 used. Both definitions are equivalent down the line.

479 Derivatives of v with respect to x take value 1, -1 or 0 depending on a

480 case.

481

482 Returns

483 -------

484 v : ndarray with shape of x

485 Scaling vector.

486 dv : ndarray with shape of x

487 Derivatives of v[i] with respect to x[i], diagonal elements of v's

488 Jacobian.

489

490 References

491 ----------

492 .. [1] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,

493 and Conjugate Gradient Method for Large-Scale Bound-Constrained

494 Minimization Problems," SIAM Journal on Scientific Computing,

495 Vol. 21, Number 1, pp 1-23, 1999.

496 """

497 v = np.ones_like(x)

498 dv = np.zeros_like(x)

499

500 mask = (g < 0) & np.isfinite(ub)

501 v[mask] = ub[mask] - x[mask]

502 dv[mask] = -1

503

504 mask = (g > 0) & np.isfinite(lb)

505 v[mask] = x[mask] - lb[mask]

506 dv[mask] = 1

507

508 return v, dv

509

510

511def reflective_transformation(y, lb, ub):

512 """Compute reflective transformation and its gradient."""

513 if in_bounds(y, lb, ub):

514 return y, np.ones_like(y)

515

516 lb_finite = np.isfinite(lb)

517 ub_finite = np.isfinite(ub)

518

519 x = y.copy()

520 g_negative = np.zeros_like(y, dtype=bool)

521

522 mask = lb_finite & ~ub_finite

523 x[mask] = np.maximum(y[mask], 2 * lb[mask] - y[mask])

524 g_negative[mask] = y[mask] < lb[mask]

525

526 mask = ~lb_finite & ub_finite

527 x[mask] = np.minimum(y[mask], 2 * ub[mask] - y[mask])

528 g_negative[mask] = y[mask] > ub[mask]

529

530 mask = lb_finite & ub_finite

531 d = ub - lb

532 t = np.remainder(y[mask] - lb[mask], 2 * d[mask])

533 x[mask] = lb[mask] + np.minimum(t, 2 * d[mask] - t)

534 g_negative[mask] = t > d[mask]

535

536 g = np.ones_like(y)

537 g[g_negative] = -1

538

539 return x, g

540

541

542# Functions to display algorithm's progress.

543

544

545def print_header_nonlinear():

546 print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}{5:^15}"

547 .format("Iteration", "Total nfev", "Cost", "Cost reduction",

548 "Step norm", "Optimality"))

549

550

551def print_iteration_nonlinear(iteration, nfev, cost, cost_reduction,

552 step_norm, optimality):

553 if cost_reduction is None:

554 cost_reduction = " " * 15

555 else:

556 cost_reduction = "{0:^15.2e}".format(cost_reduction)

557

558 if step_norm is None:

559 step_norm = " " * 15

560 else:

561 step_norm = "{0:^15.2e}".format(step_norm)

562

563 print("{0:^15}{1:^15}{2:^15.4e}{3}{4}{5:^15.2e}"

564 .format(iteration, nfev, cost, cost_reduction,

565 step_norm, optimality))

566

567

568def print_header_linear():

569 print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}"

570 .format("Iteration", "Cost", "Cost reduction", "Step norm",

571 "Optimality"))

572

573

574def print_iteration_linear(iteration, cost, cost_reduction, step_norm,

575 optimality):

576 if cost_reduction is None:

577 cost_reduction = " " * 15

578 else:

579 cost_reduction = "{0:^15.2e}".format(cost_reduction)

580

581 if step_norm is None:

582 step_norm = " " * 15

583 else:

584 step_norm = "{0:^15.2e}".format(step_norm)

585

586 print("{0:^15}{1:^15.4e}{2}{3}{4:^15.2e}".format(

587 iteration, cost, cost_reduction, step_norm, optimality))

588

589

590# Simple helper functions.

591

592

593def compute_grad(J, f):

594 """Compute gradient of the least-squares cost function."""

595 if isinstance(J, LinearOperator):

596 return J.rmatvec(f)

597 else:

598 return J.T.dot(f)

599

600

601def compute_jac_scale(J, scale_inv_old=None):

602 """Compute variables scale based on the Jacobian matrix."""

603 if issparse(J):

604 scale_inv = np.asarray(J.power(2).sum(axis=0)).ravel()**0.5

605 else:

606 scale_inv = np.sum(J**2, axis=0)**0.5

607

608 if scale_inv_old is None:

609 scale_inv[scale_inv == 0] = 1

610 else:

611 scale_inv = np.maximum(scale_inv, scale_inv_old)

612

613 return 1 / scale_inv, scale_inv

614

615

616def left_multiplied_operator(J, d):

617 """Return diag(d) J as LinearOperator."""

618 J = aslinearoperator(J)

619

620 def matvec(x):

621 return d * J.matvec(x)

622

623 def matmat(X):

624 return d[:, np.newaxis] * J.matmat(X)

625

626 def rmatvec(x):

627 return J.rmatvec(x.ravel() * d)

628

629 return LinearOperator(J.shape, matvec=matvec, matmat=matmat,

630 rmatvec=rmatvec)

631

632

633def right_multiplied_operator(J, d):

634 """Return J diag(d) as LinearOperator."""

635 J = aslinearoperator(J)

636

637 def matvec(x):

638 return J.matvec(np.ravel(x) * d)

639

640 def matmat(X):

641 return J.matmat(X * d[:, np.newaxis])

642

643 def rmatvec(x):

644 return d * J.rmatvec(x)

645

646 return LinearOperator(J.shape, matvec=matvec, matmat=matmat,

647 rmatvec=rmatvec)

648

649

650def regularized_lsq_operator(J, diag):

651 """Return a matrix arising in regularized least squares as LinearOperator.

652

653 The matrix is

654 [ J ]

655 [ D ]

656 where D is diagonal matrix with elements from `diag`.

657 """

658 J = aslinearoperator(J)

659 m, n = J.shape

660

661 def matvec(x):

662 return np.hstack((J.matvec(x), diag * x))

663

664 def rmatvec(x):

665 x1 = x[:m]

666 x2 = x[m:]

667 return J.rmatvec(x1) + diag * x2

668

669 return LinearOperator((m + n, n), matvec=matvec, rmatvec=rmatvec)

670

671

672def right_multiply(J, d, copy=True):

673 """Compute J diag(d).

674

675 If `copy` is False, `J` is modified in place (unless being LinearOperator).

676 """

677 if copy and not isinstance(J, LinearOperator):

678 J = J.copy()

679

680 if issparse(J):

681 J.data *= d.take(J.indices, mode='clip') # scikit-learn recipe.

682 elif isinstance(J, LinearOperator):

683 J = right_multiplied_operator(J, d)

684 else:

685 J *= d

686

687 return J

688

689

690def left_multiply(J, d, copy=True):

691 """Compute diag(d) J.

692

693 If `copy` is False, `J` is modified in place (unless being LinearOperator).

694 """

695 if copy and not isinstance(J, LinearOperator):

696 J = J.copy()

697

698 if issparse(J):

699 J.data *= np.repeat(d, np.diff(J.indptr)) # scikit-learn recipe.

700 elif isinstance(J, LinearOperator):

701 J = left_multiplied_operator(J, d)

702 else:

703 J *= d[:, np.newaxis]

704

705 return J

706

707

708def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):

709 """Check termination condition for nonlinear least squares."""

710 ftol_satisfied = dF < ftol * F and ratio > 0.25

711 xtol_satisfied = dx_norm < xtol * (xtol + x_norm)

712

713 if ftol_satisfied and xtol_satisfied:

714 return 4

715 elif ftol_satisfied:

716 return 2

717 elif xtol_satisfied:

718 return 3

719 else:

720 return None

721

722

723def scale_for_robust_loss_function(J, f, rho):

724 """Scale Jacobian and residuals for a robust loss function.

725

726 Arrays are modified in place.

727 """

728 J_scale = rho[1] + 2 * rho[2] * f**2

729 J_scale[J_scale < EPS] = EPS

730 J_scale **= 0.5

731

732 f *= rho[1] / J_scale

733

734 return left_multiply(J, J_scale, copy=False), f

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/optimize/_lsq/common.py: 11%

294 statements