/src/quantlib/ql/math/optimization/lmdif.cpp
Line | Count | Source |
1 | | /************************lmdif*************************/ |
2 | | |
3 | | /* |
4 | | The original Fortran version is Copyright (C) 1999 University of Chicago. |
5 | | All rights reserved. |
6 | | |
7 | | Redistribution and use in source and binary forms, with or |
8 | | without modification, are permitted provided that the |
9 | | following conditions are met: |
10 | | |
11 | | 1. Redistributions of source code must retain the above |
12 | | copyright notice, this list of conditions and the following |
13 | | disclaimer. |
14 | | |
15 | | 2. Redistributions in binary form must reproduce the above |
16 | | copyright notice, this list of conditions and the following |
17 | | disclaimer in the documentation and/or other materials |
18 | | provided with the distribution. |
19 | | |
20 | | 3. The end-user documentation included with the |
21 | | redistribution, if any, must include the following |
22 | | acknowledgment: |
23 | | |
24 | | "This product includes software developed by the |
25 | | University of Chicago, as Operator of Argonne National |
26 | | Laboratory. |
27 | | |
28 | | Alternately, this acknowledgment may appear in the software |
29 | | itself, if and wherever such third-party acknowledgments |
30 | | normally appear. |
31 | | |
32 | | 4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" |
33 | | WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE |
34 | | UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND |
35 | | THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR |
36 | | IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES |
37 | | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE |
38 | | OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY |
39 | | OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR |
40 | | USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF |
41 | | THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) |
42 | | DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION |
43 | | UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL |
44 | | BE CORRECTED. |
45 | | |
46 | | 5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT |
47 | | HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF |
48 | | ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, |
49 | | INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF |
50 | | ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF |
51 | | PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER |
52 | | SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT |
53 | | (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, |
54 | | EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE |
55 | | POSSIBILITY OF SUCH LOSS OR DAMAGES. |
56 | | |
57 | | |
58 | | C translation Copyright (C) Steve Moshier |
59 | | |
60 | | What you see here may be used freely but it comes with no support |
61 | | or guarantee. |
62 | | */ |
63 | | |
64 | | #include <ql/math/optimization/lmdif.hpp> |
65 | | #include <cmath> |
66 | | #include <cstdio> |
67 | | |
68 | | namespace QuantLib::MINPACK { |
69 | | #define BUG 0 |
70 | | /* resolution of arithmetic */ |
71 | | double MACHEP = 1.2e-16; |
72 | | /* smallest nonzero number */ |
73 | | double DWARF = 1.0e-38; |
74 | | |
75 | | |
76 | | |
77 | | |
78 | | |
79 | | Real enorm(int n,Real* x) |
80 | 0 | { |
81 | | /* |
82 | | * ********** |
83 | | * |
84 | | * function enorm |
85 | | * |
86 | | * given an n-vector x, this function calculates the |
87 | | * euclidean norm of x. |
88 | | * |
89 | | * the euclidean norm is computed by accumulating the sum of |
90 | | * squares in three different sums. the sums of squares for the |
91 | | * small and large components are scaled so that no overflows |
92 | | * occur. non-destructive underflows are permitted. underflows |
93 | | * and overflows do not occur in the computation of the unscaled |
94 | | * sum of squares for the intermediate components. |
95 | | * the definitions of small, intermediate and large components |
96 | | * depend on two constants, rdwarf and rgiant. the main |
97 | | * restrictions on these constants are that rdwarf**2 not |
98 | | * underflow and rgiant**2 not overflow. the constants |
99 | | * given here are suitable for every known computer. |
100 | | * |
101 | | * the function statement is |
102 | | * |
103 | | * double precision function enorm(n,x) |
104 | | * |
105 | | * where |
106 | | * |
107 | | * n is a positive integer input variable. |
108 | | * |
109 | | * x is an input array of length n. |
110 | | * |
111 | | * subprograms called |
112 | | * |
113 | | * fortran-supplied ... dabs,dsqrt |
114 | | * |
115 | | * argonne national laboratory. minpack project. march 1980. |
116 | | * burton s. garbow, kenneth e. hillstrom, jorge j. more |
117 | | * |
118 | | * ********** |
119 | | */ |
120 | 0 | int i; |
121 | 0 | Real agiant,floatn,s1,s2,s3,xabs,x1max,x3max; |
122 | 0 | Real ans, temp; |
123 | 0 | static double rdwarf = 3.834e-20; |
124 | 0 | static double rgiant = 1.304e19; |
125 | 0 | static double zero = 0.0; |
126 | 0 | static double one = 1.0; |
127 | |
|
128 | 0 | s1 = zero; |
129 | 0 | s2 = zero; |
130 | 0 | s3 = zero; |
131 | 0 | x1max = zero; |
132 | 0 | x3max = zero; |
133 | 0 | floatn = n; |
134 | 0 | agiant = rgiant/floatn; |
135 | |
|
136 | 0 | for( i=0; i<n; i++ ) |
137 | 0 | { |
138 | 0 | xabs = std::fabs(x[i]); |
139 | 0 | if( (xabs > rdwarf) && (xabs < agiant) ) |
140 | 0 | { |
141 | | /* |
142 | | * sum for intermediate components. |
143 | | */ |
144 | 0 | s2 += xabs*xabs; |
145 | 0 | continue; |
146 | 0 | } |
147 | | |
148 | 0 | if(xabs > rdwarf) |
149 | 0 | { |
150 | | /* |
151 | | * sum for large components. |
152 | | */ |
153 | 0 | if(xabs > x1max) |
154 | 0 | { |
155 | 0 | temp = x1max/xabs; |
156 | 0 | s1 = one + s1*temp*temp; |
157 | 0 | x1max = xabs; |
158 | 0 | } |
159 | 0 | else |
160 | 0 | { |
161 | 0 | temp = xabs/x1max; |
162 | 0 | s1 += temp*temp; |
163 | 0 | } |
164 | 0 | continue; |
165 | 0 | } |
166 | | /* |
167 | | * sum for small components. |
168 | | */ |
169 | 0 | if(xabs > x3max) |
170 | 0 | { |
171 | 0 | temp = x3max/xabs; |
172 | 0 | s3 = one + s3*temp*temp; |
173 | 0 | x3max = xabs; |
174 | 0 | } |
175 | 0 | else |
176 | 0 | { |
177 | 0 | if(xabs != zero) |
178 | 0 | { |
179 | 0 | temp = xabs/x3max; |
180 | 0 | s3 += temp*temp; |
181 | 0 | } |
182 | 0 | } |
183 | 0 | } |
184 | | /* |
185 | | * calculation of norm. |
186 | | */ |
187 | 0 | if(s1 != zero) |
188 | 0 | { |
189 | 0 | temp = s1 + (s2/x1max)/x1max; |
190 | 0 | ans = x1max*std::sqrt(temp); |
191 | 0 | return(ans); |
192 | 0 | } |
193 | 0 | if(s2 != zero) |
194 | 0 | { |
195 | 0 | if(s2 >= x3max) |
196 | 0 | temp = s2*(one+(x3max/s2)*(x3max*s3)); |
197 | 0 | else |
198 | 0 | temp = x3max*((s2/x3max)+(x3max*s3)); |
199 | 0 | ans = std::sqrt(temp); |
200 | 0 | } |
201 | 0 | else |
202 | 0 | { |
203 | 0 | ans = x3max*std::sqrt(s3); |
204 | 0 | } |
205 | 0 | return(ans); |
206 | | /* |
207 | | * last card of function enorm. |
208 | | */ |
209 | 0 | } |
210 | | /************************lmmisc.c*************************/ |
211 | | |
212 | | Real dmax1(Real a,Real b) |
213 | 0 | { |
214 | 0 | if( a >= b ) |
215 | 0 | return(a); |
216 | 0 | else |
217 | 0 | return(b); |
218 | 0 | } |
219 | | |
220 | | Real dmin1(Real a,Real b) |
221 | 0 | { |
222 | 0 | if( a <= b ) |
223 | 0 | return(a); |
224 | 0 | else |
225 | 0 | return(b); |
226 | 0 | } |
227 | | |
228 | | int min0(int a,int b) |
229 | | |
230 | 0 | { |
231 | 0 | if( a <= b ) |
232 | 0 | return(a); |
233 | 0 | else |
234 | 0 | return(b); |
235 | 0 | } |
236 | | |
237 | | int mod( int k, int m ) |
238 | 0 | { |
239 | 0 | return( k % m ); |
240 | 0 | } |
241 | | |
242 | | |
243 | | /***********Sample of user supplied function**************** |
244 | | * m = number of functions |
245 | | * n = number of variables |
246 | | * x = vector of function arguments |
247 | | * fvec = vector of function values |
248 | | * iflag = error return variable |
249 | | */ |
250 | | //void fcn(int m,int n, Real* x, Real* fvec,int *iflag) |
251 | | //{ |
252 | | // QuantLib::LevenbergMarquardt::fcn(m, n, x, fvec, iflag); |
253 | | //} |
254 | | |
255 | | void fdjac2(int m, |
256 | | int n, |
257 | | Real* x, |
258 | | const Real* fvec, |
259 | | Real* fjac, |
260 | | int, |
261 | | int* iflag, |
262 | | Real epsfcn, |
263 | | Real* wa, |
264 | 0 | const QuantLib::MINPACK::LmdifCostFunction& fcn) { |
265 | | /* |
266 | | * ********** |
267 | | * |
268 | | * subroutine fdjac2 |
269 | | * |
270 | | * this subroutine computes a forward-difference approximation |
271 | | * to the m by n jacobian matrix associated with a specified |
272 | | * problem of m functions in n variables. |
273 | | * |
274 | | * the subroutine statement is |
275 | | * |
276 | | * subroutine fdjac2(fcn,m,n,x,fvec,fjac,ldfjac,iflag,epsfcn,wa) |
277 | | * |
278 | | * where |
279 | | * |
280 | | * fcn is the name of the user-supplied subroutine which |
281 | | * calculates the functions. fcn must be declared |
282 | | * in an external statement in the user calling |
283 | | * program, and should be written as follows. |
284 | | * |
285 | | * subroutine fcn(m,n,x,fvec,iflag) |
286 | | * integer m,n,iflag |
287 | | * double precision x(n),fvec(m) |
288 | | * ---------- |
289 | | * calculate the functions at x and |
290 | | * return this vector in fvec. |
291 | | * ---------- |
292 | | * return |
293 | | * end |
294 | | * |
295 | | * the value of iflag should not be changed by fcn unless |
296 | | * the user wants to terminate execution of fdjac2. |
297 | | * in this case set iflag to a negative integer. |
298 | | * |
299 | | * m is a positive integer input variable set to the number |
300 | | * of functions. |
301 | | * |
302 | | * n is a positive integer input variable set to the number |
303 | | * of variables. n must not exceed m. |
304 | | * |
305 | | * x is an input array of length n. |
306 | | * |
307 | | * fvec is an input array of length m which must contain the |
308 | | * functions evaluated at x. |
309 | | * |
310 | | * fjac is an output m by n array which contains the |
311 | | * approximation to the jacobian matrix evaluated at x. |
312 | | * |
313 | | * ldfjac is a positive integer input variable not less than m |
314 | | * which specifies the leading dimension of the array fjac. |
315 | | * |
316 | | * iflag is an integer variable which can be used to terminate |
317 | | * the execution of fdjac2. see description of fcn. |
318 | | * |
319 | | * epsfcn is an input variable used in determining a suitable |
320 | | * step length for the forward-difference approximation. this |
321 | | * approximation assumes that the relative errors in the |
322 | | * functions are of the order of epsfcn. if epsfcn is less |
323 | | * than the machine precision, it is assumed that the relative |
324 | | * errors in the functions are of the order of the machine |
325 | | * precision. |
326 | | * |
327 | | * wa is a work array of length m. |
328 | | * |
329 | | * subprograms called |
330 | | * |
331 | | * user-supplied ...... fcn |
332 | | * |
333 | | * minpack-supplied ... dpmpar |
334 | | * |
335 | | * fortran-supplied ... dabs,dmax1,dsqrt |
336 | | * |
337 | | * argonne national laboratory. minpack project. march 1980. |
338 | | * burton s. garbow, kenneth e. hillstrom, jorge j. more |
339 | | * |
340 | | ********** |
341 | | */ |
342 | 0 | int i, j, ij; |
343 | 0 | Real eps, h, temp; |
344 | 0 | static double zero = 0.0; |
345 | | |
346 | |
|
347 | 0 | temp = dmax1(epsfcn, MACHEP); |
348 | 0 | eps = std::sqrt(temp); |
349 | 0 | ij = 0; |
350 | 0 | for (j = 0; j < n; j++) { |
351 | 0 | temp = x[j]; |
352 | 0 | h = eps * std::fabs(temp); |
353 | 0 | if (h == zero) |
354 | 0 | h = eps; |
355 | 0 | x[j] = temp + h; |
356 | 0 | fcn(m, n, x, wa, iflag); |
357 | 0 | if (*iflag < 0) |
358 | 0 | return; |
359 | 0 | x[j] = temp; |
360 | 0 | for (i = 0; i < m; i++) { |
361 | 0 | fjac[ij] = (wa[i] - fvec[i]) / h; |
362 | 0 | ij += 1; /* fjac[i+m*j] */ |
363 | 0 | } |
364 | 0 | } |
365 | | /* |
366 | | * last card of subroutine fdjac2. |
367 | | */ |
368 | 0 | } |
369 | | /************************qrfac.c*************************/ |
370 | | |
371 | | |
372 | | void |
373 | | qrfac(int m,int n,Real* a,int,int pivot,int* ipvt, |
374 | | int,Real* rdiag,Real* acnorm,Real* wa) |
375 | 0 | { |
376 | | /* |
377 | | * ********** |
378 | | * |
379 | | * subroutine qrfac |
380 | | * |
381 | | * this subroutine uses householder transformations with column |
382 | | * pivoting (optional) to compute a qr factorization of the |
383 | | * m by n matrix a. that is, qrfac determines an orthogonal |
384 | | * matrix q, a permutation matrix p, and an upper trapezoidal |
385 | | * matrix r with diagonal elements of nonincreasing magnitude, |
386 | | * such that a*p = q*r. the householder transformation for |
387 | | * column k, k = 1,2,...,min(m,n), is of the form |
388 | | * |
389 | | * t |
390 | | * i - (1/u(k))*u*u |
391 | | * |
392 | | * where u has zeros in the first k-1 positions. the form of |
393 | | * this transformation and the method of pivoting first |
394 | | * appeared in the corresponding linpack subroutine. |
395 | | * |
396 | | * the subroutine statement is |
397 | | * |
398 | | * subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa) |
399 | | * |
400 | | * where |
401 | | * |
402 | | * m is a positive integer input variable set to the number |
403 | | * of rows of a. |
404 | | * |
405 | | * n is a positive integer input variable set to the number |
406 | | * of columns of a. |
407 | | * |
408 | | * a is an m by n array. on input a contains the matrix for |
409 | | * which the qr factorization is to be computed. on output |
410 | | * the strict upper trapezoidal part of a contains the strict |
411 | | * upper trapezoidal part of r, and the lower trapezoidal |
412 | | * part of a contains a factored form of q (the non-trivial |
413 | | * elements of the u vectors described above). |
414 | | * |
415 | | * lda is a positive integer input variable not less than m |
416 | | * which specifies the leading dimension of the array a. |
417 | | * |
418 | | * pivot is a logical input variable. if pivot is set true, |
419 | | * then column pivoting is enforced. if pivot is set false, |
420 | | * then no column pivoting is done. |
421 | | * |
422 | | * ipvt is an integer output array of length lipvt. ipvt |
423 | | * defines the permutation matrix p such that a*p = q*r. |
424 | | * column j of p is column ipvt(j) of the identity matrix. |
425 | | * if pivot is false, ipvt is not referenced. |
426 | | * |
427 | | * lipvt is a positive integer input variable. if pivot is false, |
428 | | * then lipvt may be as small as 1. if pivot is true, then |
429 | | * lipvt must be at least n. |
430 | | * |
431 | | * rdiag is an output array of length n which contains the |
432 | | * diagonal elements of r. |
433 | | * |
434 | | * acnorm is an output array of length n which contains the |
435 | | * norms of the corresponding columns of the input matrix a. |
436 | | * if this information is not needed, then acnorm can coincide |
437 | | * with rdiag. |
438 | | * |
439 | | * wa is a work array of length n. if pivot is false, then wa |
440 | | * can coincide with rdiag. |
441 | | * |
442 | | * subprograms called |
443 | | * |
444 | | * minpack-supplied ... dpmpar,enorm |
445 | | * |
446 | | * fortran-supplied ... dmax1,dsqrt,min0 |
447 | | * |
448 | | * argonne national laboratory. minpack project. march 1980. |
449 | | * burton s. garbow, kenneth e. hillstrom, jorge j. more |
450 | | * |
451 | | * ********** |
452 | | */ |
453 | 0 | int i,ij,jj,j,jp1,k,kmax,minmn; |
454 | 0 | Real ajnorm,sum,temp; |
455 | 0 | static double zero = 0.0; |
456 | 0 | static double one = 1.0; |
457 | 0 | static double p05 = 0.05; |
458 | | |
459 | | /* |
460 | | * compute the initial column norms and initialize several arrays. |
461 | | */ |
462 | 0 | ij = 0; |
463 | 0 | for( j=0; j<n; j++ ) |
464 | 0 | { |
465 | 0 | acnorm[j] = enorm(m,&a[ij]); |
466 | 0 | rdiag[j] = acnorm[j]; |
467 | 0 | wa[j] = rdiag[j]; |
468 | 0 | if(pivot != 0) |
469 | 0 | ipvt[j] = j; |
470 | 0 | ij += m; /* m*j */ |
471 | 0 | } |
472 | | /* |
473 | | * reduce a to r with householder transformations. |
474 | | */ |
475 | 0 | minmn = min0(m,n); |
476 | 0 | for( j=0; j<minmn; j++ ) |
477 | 0 | { |
478 | 0 | if(pivot == 0) |
479 | 0 | goto L40; |
480 | | /* |
481 | | * bring the column of largest norm into the pivot position. |
482 | | */ |
483 | 0 | kmax = j; |
484 | 0 | for( k=j; k<n; k++ ) |
485 | 0 | { |
486 | 0 | if(rdiag[k] > rdiag[kmax]) |
487 | 0 | kmax = k; |
488 | 0 | } |
489 | 0 | if(kmax == j) |
490 | 0 | goto L40; |
491 | | |
492 | 0 | ij = m * j; |
493 | 0 | jj = m * kmax; |
494 | 0 | for( i=0; i<m; i++ ) |
495 | 0 | { |
496 | 0 | temp = a[ij]; /* [i+m*j] */ |
497 | 0 | a[ij] = a[jj]; /* [i+m*kmax] */ |
498 | 0 | a[jj] = temp; |
499 | 0 | ij += 1; |
500 | 0 | jj += 1; |
501 | 0 | } |
502 | 0 | rdiag[kmax] = rdiag[j]; |
503 | 0 | wa[kmax] = wa[j]; |
504 | 0 | k = ipvt[j]; |
505 | 0 | ipvt[j] = ipvt[kmax]; |
506 | 0 | ipvt[kmax] = k; |
507 | |
|
508 | 0 | L40: |
509 | | /* |
510 | | * compute the householder transformation to reduce the |
511 | | * j-th column of a to a multiple of the j-th unit vector. |
512 | | */ |
513 | 0 | jj = j + m*j; |
514 | 0 | ajnorm = enorm(m-j,&a[jj]); |
515 | 0 | if(ajnorm == zero) |
516 | 0 | goto L100; |
517 | 0 | if(a[jj] < zero) |
518 | 0 | ajnorm = -ajnorm; |
519 | 0 | ij = jj; |
520 | 0 | for( i=j; i<m; i++ ) |
521 | 0 | { |
522 | 0 | a[ij] /= ajnorm; |
523 | 0 | ij += 1; /* [i+m*j] */ |
524 | 0 | } |
525 | 0 | a[jj] += one; |
526 | | /* |
527 | | * apply the transformation to the remaining columns |
528 | | * and update the norms. |
529 | | */ |
530 | 0 | jp1 = j + 1; |
531 | 0 | if(jp1 < n ) |
532 | 0 | { |
533 | 0 | for( k=jp1; k<n; k++ ) |
534 | 0 | { |
535 | 0 | sum = zero; |
536 | 0 | ij = j + m*k; |
537 | 0 | jj = j + m*j; |
538 | 0 | for( i=j; i<m; i++ ) |
539 | 0 | { |
540 | 0 | sum += a[jj]*a[ij]; |
541 | 0 | ij += 1; /* [i+m*k] */ |
542 | 0 | jj += 1; /* [i+m*j] */ |
543 | 0 | } |
544 | 0 | temp = sum/a[j+m*j]; |
545 | 0 | ij = j + m*k; |
546 | 0 | jj = j + m*j; |
547 | 0 | for( i=j; i<m; i++ ) |
548 | 0 | { |
549 | 0 | a[ij] -= temp*a[jj]; |
550 | 0 | ij += 1; /* [i+m*k] */ |
551 | 0 | jj += 1; /* [i+m*j] */ |
552 | 0 | } |
553 | 0 | if( (pivot != 0) && (rdiag[k] != zero) ) |
554 | 0 | { |
555 | 0 | temp = a[j+m*k]/rdiag[k]; |
556 | 0 | temp = dmax1( zero, one-temp*temp ); |
557 | 0 | rdiag[k] *= std::sqrt(temp); |
558 | 0 | temp = rdiag[k]/wa[k]; |
559 | 0 | if( (p05*temp*temp) <= MACHEP) |
560 | 0 | { |
561 | 0 | rdiag[k] = enorm(m-j-1,&a[jp1+m*k]); |
562 | 0 | wa[k] = rdiag[k]; |
563 | 0 | } |
564 | 0 | } |
565 | 0 | } |
566 | 0 | } |
567 | |
|
568 | 0 | L100: |
569 | 0 | rdiag[j] = -ajnorm; |
570 | 0 | } |
571 | | /* |
572 | | * last card of subroutine qrfac. |
573 | | */ |
574 | 0 | } |
575 | | |
576 | | /************************qrsolv.c*************************/ |
577 | | |
578 | | |
579 | | void qrsolv(int n, |
580 | | Real* r, |
581 | | int ldr, |
582 | | const int* ipvt, |
583 | | const Real* diag, |
584 | | const Real* qtb, |
585 | | Real* x, |
586 | | Real* sdiag, |
587 | 0 | Real* wa) { |
588 | | /* |
589 | | * ********** |
590 | | * |
591 | | * subroutine qrsolv |
592 | | * |
593 | | * given an m by n matrix a, an n by n diagonal matrix d, |
594 | | * and an m-vector b, the problem is to determine an x which |
595 | | * solves the system |
596 | | * |
597 | | * a*x = b , d*x = 0 , |
598 | | * |
599 | | * in the least squares sense. |
600 | | * |
601 | | * this subroutine completes the solution of the problem |
602 | | * if it is provided with the necessary information from the |
603 | | * qr factorization, with column pivoting, of a. that is, if |
604 | | * a*p = q*r, where p is a permutation matrix, q has orthogonal |
605 | | * columns, and r is an upper triangular matrix with diagonal |
606 | | * elements of nonincreasing magnitude, then qrsolv expects |
607 | | * the full upper triangle of r, the permutation matrix p, |
608 | | * and the first n components of (q transpose)*b. the system |
609 | | * a*x = b, d*x = 0, is then equivalent to |
610 | | * |
611 | | * t t |
612 | | * r*z = q *b , p *d*p*z = 0 , |
613 | | * |
614 | | * where x = p*z. if this system does not have full rank, |
615 | | * then a least squares solution is obtained. on output qrsolv |
616 | | * also provides an upper triangular matrix s such that |
617 | | * |
618 | | * t t t |
619 | | * p *(a *a + d*d)*p = s *s . |
620 | | * |
621 | | * s is computed within qrsolv and may be of separate interest. |
622 | | * |
623 | | * the subroutine statement is |
624 | | * |
625 | | * subroutine qrsolv(n,r,ldr,ipvt,diag,qtb,x,sdiag,wa) |
626 | | * |
627 | | * where |
628 | | * |
629 | | * n is a positive integer input variable set to the order of r. |
630 | | * |
631 | | * r is an n by n array. on input the full upper triangle |
632 | | * must contain the full upper triangle of the matrix r. |
633 | | * on output the full upper triangle is unaltered, and the |
634 | | * strict lower triangle contains the strict upper triangle |
635 | | * (transposed) of the upper triangular matrix s. |
636 | | * |
637 | | * ldr is a positive integer input variable not less than n |
638 | | * which specifies the leading dimension of the array r. |
639 | | * |
640 | | * ipvt is an integer input array of length n which defines the |
641 | | * permutation matrix p such that a*p = q*r. column j of p |
642 | | * is column ipvt(j) of the identity matrix. |
643 | | * |
644 | | * diag is an input array of length n which must contain the |
645 | | * diagonal elements of the matrix d. |
646 | | * |
647 | | * qtb is an input array of length n which must contain the first |
648 | | * n elements of the vector (q transpose)*b. |
649 | | * |
650 | | * x is an output array of length n which contains the least |
651 | | * squares solution of the system a*x = b, d*x = 0. |
652 | | * |
653 | | * sdiag is an output array of length n which contains the |
654 | | * diagonal elements of the upper triangular matrix s. |
655 | | * |
656 | | * wa is a work array of length n. |
657 | | * |
658 | | * subprograms called |
659 | | * |
660 | | * fortran-supplied ... dabs,dsqrt |
661 | | * |
662 | | * argonne national laboratory. minpack project. march 1980. |
663 | | * burton s. garbow, kenneth e. hillstrom, jorge j. more |
664 | | * |
665 | | * ********** |
666 | | */ |
667 | 0 | int i, ij, ik, kk, j, jp1, k, kp1, l, nsing; |
668 | 0 | Real cos, cotan, qtbpj, sin, sum, tan, temp; |
669 | 0 | static double zero = 0.0; |
670 | 0 | static double p25 = 0.25; |
671 | 0 | static double p5 = 0.5; |
672 | | |
673 | | /* |
674 | | * copy r and (q transpose)*b to preserve input and initialize s. |
675 | | * in particular, save the diagonal elements of r in x. |
676 | | */ |
677 | 0 | kk = 0; |
678 | 0 | for (j = 0; j < n; j++) { |
679 | 0 | ij = kk; |
680 | 0 | ik = kk; |
681 | 0 | for (i = j; i < n; i++) { |
682 | 0 | r[ij] = r[ik]; |
683 | 0 | ij += 1; /* [i+ldr*j] */ |
684 | 0 | ik += ldr; /* [j+ldr*i] */ |
685 | 0 | } |
686 | 0 | x[j] = r[kk]; |
687 | 0 | wa[j] = qtb[j]; |
688 | 0 | kk += ldr+1; /* j+ldr*j */ |
689 | 0 | } |
690 | | /* |
691 | | * eliminate the diagonal matrix d using a givens rotation. |
692 | | */ |
693 | 0 | for( j=0; j<n; j++ ) |
694 | 0 | { |
695 | | /* |
696 | | * prepare the row of d to be eliminated, locating the |
697 | | * diagonal element using p from the qr factorization. |
698 | | */ |
699 | 0 | l = ipvt[j]; |
700 | 0 | if(diag[l] == zero) |
701 | 0 | goto L90; |
702 | 0 | for( k=j; k<n; k++ ) |
703 | 0 | sdiag[k] = zero; |
704 | 0 | sdiag[j] = diag[l]; |
705 | | /* |
706 | | * the transformations to eliminate the row of d |
707 | | * modify only a single element of (q transpose)*b |
708 | | * beyond the first n, which is initially zero. |
709 | | */ |
710 | 0 | qtbpj = zero; |
711 | 0 | for( k=j; k<n; k++ ) |
712 | 0 | { |
713 | | /* |
714 | | * determine a givens rotation which eliminates the |
715 | | * appropriate element in the current row of d. |
716 | | */ |
717 | 0 | if(sdiag[k] == zero) |
718 | 0 | continue; |
719 | 0 | kk = k + ldr * k; |
720 | 0 | if(std::fabs(r[kk]) < std::fabs(sdiag[k])) |
721 | 0 | { |
722 | 0 | cotan = r[kk]/sdiag[k]; |
723 | 0 | sin = p5/std::sqrt(p25+p25*cotan*cotan); |
724 | 0 | cos = sin*cotan; |
725 | 0 | } |
726 | 0 | else |
727 | 0 | { |
728 | 0 | tan = sdiag[k]/r[kk]; |
729 | 0 | cos = p5/std::sqrt(p25+p25*tan*tan); |
730 | 0 | sin = cos*tan; |
731 | 0 | } |
732 | | /* |
733 | | * compute the modified diagonal element of r and |
734 | | * the modified element of ((q transpose)*b,0). |
735 | | */ |
736 | 0 | r[kk] = cos*r[kk] + sin*sdiag[k]; |
737 | 0 | temp = cos*wa[k] + sin*qtbpj; |
738 | 0 | qtbpj = -sin*wa[k] + cos*qtbpj; |
739 | 0 | wa[k] = temp; |
740 | | /* |
741 | | * accumulate the tranformation in the row of s. |
742 | | */ |
743 | 0 | kp1 = k + 1; |
744 | 0 | if( n > kp1 ) |
745 | 0 | { |
746 | 0 | ik = kk + 1; |
747 | 0 | for( i=kp1; i<n; i++ ) |
748 | 0 | { |
749 | 0 | temp = cos*r[ik] + sin*sdiag[i]; |
750 | 0 | sdiag[i] = -sin*r[ik] + cos*sdiag[i]; |
751 | 0 | r[ik] = temp; |
752 | 0 | ik += 1; /* [i+ldr*k] */ |
753 | 0 | } |
754 | 0 | } |
755 | 0 | } |
756 | 0 | L90: |
757 | | /* |
758 | | * store the diagonal element of s and restore |
759 | | * the corresponding diagonal element of r. |
760 | | */ |
761 | 0 | kk = j + ldr*j; |
762 | 0 | sdiag[j] = r[kk]; |
763 | 0 | r[kk] = x[j]; |
764 | 0 | } |
765 | | /* |
766 | | * solve the triangular system for z. if the system is |
767 | | * singular, then obtain a least squares solution. |
768 | | */ |
769 | 0 | nsing = n; |
770 | 0 | for( j=0; j<n; j++ ) |
771 | 0 | { |
772 | 0 | if( (sdiag[j] == zero) && (nsing == n) ) |
773 | 0 | nsing = j; |
774 | 0 | if(nsing < n) |
775 | 0 | wa[j] = zero; |
776 | 0 | } |
777 | 0 | if(nsing < 1) |
778 | 0 | goto L150; |
779 | | |
780 | 0 | for( k=0; k<nsing; k++ ) |
781 | 0 | { |
782 | 0 | j = nsing - k - 1; |
783 | 0 | sum = zero; |
784 | 0 | jp1 = j + 1; |
785 | 0 | if(nsing > jp1) |
786 | 0 | { |
787 | 0 | ij = jp1 + ldr * j; |
788 | 0 | for( i=jp1; i<nsing; i++ ) |
789 | 0 | { |
790 | 0 | sum += r[ij]*wa[i]; |
791 | 0 | ij += 1; /* [i+ldr*j] */ |
792 | 0 | } |
793 | 0 | } |
794 | 0 | wa[j] = (wa[j] - sum)/sdiag[j]; |
795 | 0 | } |
796 | 0 | L150: |
797 | | /* |
798 | | * permute the components of z back to components of x. |
799 | | */ |
800 | 0 | for( j=0; j<n; j++ ) |
801 | 0 | { |
802 | 0 | l = ipvt[j]; |
803 | 0 | x[l] = wa[j]; |
804 | 0 | } |
805 | | /* |
806 | | * last card of subroutine qrsolv. |
807 | | */ |
808 | 0 | } |
809 | | |
810 | | /************************lmpar.c*************************/ |
811 | | |
812 | | |
813 | | void lmpar(int n, |
814 | | Real* r, |
815 | | int ldr, |
816 | | int* ipvt, |
817 | | const Real* diag, |
818 | | Real* qtb, |
819 | | Real delta, |
820 | | Real* par, |
821 | | Real* x, |
822 | | Real* sdiag, |
823 | | Real* wa1, |
824 | 0 | Real* wa2) { |
825 | | /* ********** |
826 | | * |
827 | | * subroutine lmpar |
828 | | * |
829 | | * given an m by n matrix a, an n by n nonsingular diagonal |
830 | | * matrix d, an m-vector b, and a positive number delta, |
831 | | * the problem is to determine a value for the parameter |
832 | | * par such that if x solves the system |
833 | | * |
834 | | * a*x = b , sqrt(par)*d*x = 0 , |
835 | | * |
836 | | * in the least squares sense, and dxnorm is the euclidean |
837 | | * norm of d*x, then either par is zero and |
838 | | * |
839 | | * (dxnorm-delta) .le. 0.1*delta , |
840 | | * |
841 | | * or par is positive and |
842 | | * |
843 | | * abs(dxnorm-delta) .le. 0.1*delta . |
844 | | * |
845 | | * this subroutine completes the solution of the problem |
846 | | * if it is provided with the necessary information from the |
847 | | * qr factorization, with column pivoting, of a. that is, if |
848 | | * a*p = q*r, where p is a permutation matrix, q has orthogonal |
849 | | * columns, and r is an upper triangular matrix with diagonal |
850 | | * elements of nonincreasing magnitude, then lmpar expects |
851 | | * the full upper triangle of r, the permutation matrix p, |
852 | | * and the first n components of (q transpose)*b. on output |
853 | | * lmpar also provides an upper triangular matrix s such that |
854 | | * |
855 | | * t t t |
856 | | * p *(a *a + par*d*d)*p = s *s . |
857 | | * |
858 | | * s is employed within lmpar and may be of separate interest. |
859 | | * |
860 | | * only a few iterations are generally needed for convergence |
861 | | * of the algorithm. if, however, the limit of 10 iterations |
862 | | * is reached, then the output par will contain the best |
863 | | * value obtained so far. |
864 | | * |
865 | | * the subroutine statement is |
866 | | * |
867 | | * subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag, |
868 | | * wa1,wa2) |
869 | | * |
870 | | * where |
871 | | * |
872 | | * n is a positive integer input variable set to the order of r. |
873 | | * |
874 | | * r is an n by n array. on input the full upper triangle |
875 | | * must contain the full upper triangle of the matrix r. |
876 | | * on output the full upper triangle is unaltered, and the |
877 | | * strict lower triangle contains the strict upper triangle |
878 | | * (transposed) of the upper triangular matrix s. |
879 | | * |
880 | | * ldr is a positive integer input variable not less than n |
881 | | * which specifies the leading dimension of the array r. |
882 | | * |
883 | | * ipvt is an integer input array of length n which defines the |
884 | | * permutation matrix p such that a*p = q*r. column j of p |
885 | | * is column ipvt(j) of the identity matrix. |
886 | | * |
887 | | * diag is an input array of length n which must contain the |
888 | | * diagonal elements of the matrix d. |
889 | | * |
890 | | * qtb is an input array of length n which must contain the first |
891 | | * n elements of the vector (q transpose)*b. |
892 | | * |
893 | | * delta is a positive input variable which specifies an upper |
894 | | * bound on the euclidean norm of d*x. |
895 | | * |
896 | | * par is a nonnegative variable. on input par contains an |
897 | | * initial estimate of the levenberg-marquardt parameter. |
898 | | * on output par contains the final estimate. |
899 | | * |
900 | | * x is an output array of length n which contains the least |
901 | | * squares solution of the system a*x = b, sqrt(par)*d*x = 0, |
902 | | * for the output par. |
903 | | * |
904 | | * sdiag is an output array of length n which contains the |
905 | | * diagonal elements of the upper triangular matrix s. |
906 | | * |
907 | | * wa1 and wa2 are work arrays of length n. |
908 | | * |
909 | | * subprograms called |
910 | | * |
911 | | * minpack-supplied ... dpmpar,enorm,qrsolv |
912 | | * |
913 | | * fortran-supplied ... dabs,dmax1,dmin1,dsqrt |
914 | | * |
915 | | * argonne national laboratory. minpack project. march 1980. |
916 | | * burton s. garbow, kenneth e. hillstrom, jorge j. more |
917 | | * |
918 | | * ********** |
919 | | */ |
920 | 0 | int i, iter, ij, jj, j, jm1, jp1, k, l, nsing; |
921 | 0 | Real dxnorm, fp, gnorm, parc, parl, paru; |
922 | 0 | Real sum, temp; |
923 | 0 | static double zero = 0.0; |
924 | 0 | static double p1 = 0.1; |
925 | 0 | static double p001 = 0.001; |
926 | | |
927 | | |
928 | | /* |
929 | | * compute and store in x the gauss-newton direction. if the |
930 | | * jacobian is rank-deficient, obtain a least squares solution. |
931 | | */ |
932 | 0 | nsing = n; |
933 | 0 | jj = 0; |
934 | 0 | for (j = 0; j < n; j++) { |
935 | 0 | wa1[j] = qtb[j]; |
936 | 0 | if ((r[jj] == zero) && (nsing == n)) |
937 | 0 | nsing = j; |
938 | 0 | if (nsing < n) |
939 | 0 | wa1[j] = zero; |
940 | 0 | jj += ldr + 1; /* [j+ldr*j] */ |
941 | 0 | } |
942 | 0 | if(nsing >= 1) |
943 | 0 | { |
944 | 0 | for( k=0; k<nsing; k++ ) |
945 | 0 | { |
946 | 0 | j = nsing - k - 1; |
947 | 0 | wa1[j] = wa1[j]/r[j+ldr*j]; |
948 | 0 | temp = wa1[j]; |
949 | 0 | jm1 = j - 1; |
950 | 0 | if(jm1 >= 0) |
951 | 0 | { |
952 | 0 | ij = ldr * j; |
953 | 0 | for( i=0; i<=jm1; i++ ) |
954 | 0 | { |
955 | 0 | wa1[i] -= r[ij]*temp; |
956 | 0 | ij += 1; |
957 | 0 | } |
958 | 0 | } |
959 | 0 | } |
960 | 0 | } |
961 | |
|
962 | 0 | for( j=0; j<n; j++ ) |
963 | 0 | { |
964 | 0 | l = ipvt[j]; |
965 | 0 | x[l] = wa1[j]; |
966 | 0 | } |
967 | | /* |
968 | | * initialize the iteration counter. |
969 | | * evaluate the function at the origin, and test |
970 | | * for acceptance of the gauss-newton direction. |
971 | | */ |
972 | 0 | iter = 0; |
973 | 0 | for( j=0; j<n; j++ ) |
974 | 0 | wa2[j] = diag[j]*x[j]; |
975 | 0 | dxnorm = enorm(n,wa2); |
976 | 0 | fp = dxnorm - delta; |
977 | 0 | if(fp <= p1*delta) |
978 | 0 | { |
979 | 0 | goto L220; |
980 | 0 | } |
981 | | /* |
982 | | * if the jacobian is not rank deficient, the newton |
983 | | * step provides a lower bound, parl, for the zero of |
984 | | * the function. otherwise set this bound to zero. |
985 | | */ |
986 | 0 | parl = zero; |
987 | 0 | if(nsing >= n) |
988 | 0 | { |
989 | 0 | for( j=0; j<n; j++ ) |
990 | 0 | { |
991 | 0 | l = ipvt[j]; |
992 | 0 | wa1[j] = diag[l]*(wa2[l]/dxnorm); |
993 | 0 | } |
994 | 0 | jj = 0; |
995 | 0 | for( j=0; j<n; j++ ) |
996 | 0 | { |
997 | 0 | sum = zero; |
998 | 0 | jm1 = j - 1; |
999 | 0 | if(jm1 >= 0) |
1000 | 0 | { |
1001 | 0 | ij = jj; |
1002 | 0 | for( i=0; i<=jm1; i++ ) |
1003 | 0 | { |
1004 | 0 | sum += r[ij]*wa1[i]; |
1005 | 0 | ij += 1; |
1006 | 0 | } |
1007 | 0 | } |
1008 | 0 | wa1[j] = (wa1[j] - sum)/r[j+ldr*j]; |
1009 | 0 | jj += ldr; /* [i+ldr*j] */ |
1010 | 0 | } |
1011 | 0 | temp = enorm(n,wa1); |
1012 | 0 | parl = ((fp/delta)/temp)/temp; |
1013 | 0 | } |
1014 | | /* |
1015 | | * calculate an upper bound, paru, for the zero of the function. |
1016 | | */ |
1017 | 0 | jj = 0; |
1018 | 0 | for( j=0; j<n; j++ ) |
1019 | 0 | { |
1020 | 0 | sum = zero; |
1021 | 0 | ij = jj; |
1022 | 0 | for( i=0; i<=j; i++ ) |
1023 | 0 | { |
1024 | 0 | sum += r[ij]*qtb[i]; |
1025 | 0 | ij += 1; |
1026 | 0 | } |
1027 | 0 | l = ipvt[j]; |
1028 | 0 | wa1[j] = sum/diag[l]; |
1029 | 0 | jj += ldr; /* [i+ldr*j] */ |
1030 | 0 | } |
1031 | 0 | gnorm = enorm(n,wa1); |
1032 | 0 | paru = gnorm/delta; |
1033 | 0 | if(paru == zero) |
1034 | 0 | paru = DWARF/dmin1(delta,p1); |
1035 | | /* |
1036 | | * if the input par lies outside of the interval (parl,paru), |
1037 | | * set par to the closer endpoint. |
1038 | | */ |
1039 | 0 | *par = dmax1( *par,parl); |
1040 | 0 | *par = dmin1( *par,paru); |
1041 | 0 | if( *par == zero) |
1042 | 0 | *par = gnorm/dxnorm; |
1043 | | /* |
1044 | | * beginning of an iteration. |
1045 | | */ |
1046 | 0 | L150: |
1047 | 0 | iter += 1; |
1048 | | /* |
1049 | | * evaluate the function at the current value of par. |
1050 | | */ |
1051 | 0 | if( *par == zero) |
1052 | 0 | *par = dmax1(DWARF,p001*paru); |
1053 | 0 | temp = std::sqrt( *par ); |
1054 | 0 | for( j=0; j<n; j++ ) |
1055 | 0 | wa1[j] = temp*diag[j]; |
1056 | 0 | qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2); |
1057 | 0 | for( j=0; j<n; j++ ) |
1058 | 0 | wa2[j] = diag[j]*x[j]; |
1059 | 0 | dxnorm = enorm(n,wa2); |
1060 | 0 | temp = fp; |
1061 | 0 | fp = dxnorm - delta; |
1062 | | /* |
1063 | | * if the function is small enough, accept the current value |
1064 | | * of par. also test for the exceptional cases where parl |
1065 | | * is zero or the number of iterations has reached 10. |
1066 | | */ |
1067 | 0 | if( (std::fabs(fp) <= p1*delta) |
1068 | 0 | || ((parl == zero) && (fp <= temp) && (temp < zero)) |
1069 | 0 | || (iter == 10) ) |
1070 | 0 | goto L220; |
1071 | | /* |
1072 | | * compute the newton correction. |
1073 | | */ |
1074 | 0 | for( j=0; j<n; j++ ) |
1075 | 0 | { |
1076 | 0 | l = ipvt[j]; |
1077 | 0 | wa1[j] = diag[l]*(wa2[l]/dxnorm); |
1078 | 0 | } |
1079 | 0 | jj = 0; |
1080 | 0 | for( j=0; j<n; j++ ) |
1081 | 0 | { |
1082 | 0 | wa1[j] = wa1[j]/sdiag[j]; |
1083 | 0 | temp = wa1[j]; |
1084 | 0 | jp1 = j + 1; |
1085 | 0 | if(jp1 < n) |
1086 | 0 | { |
1087 | 0 | ij = jp1 + jj; |
1088 | 0 | for( i=jp1; i<n; i++ ) |
1089 | 0 | { |
1090 | 0 | wa1[i] -= r[ij]*temp; |
1091 | 0 | ij += 1; /* [i+ldr*j] */ |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | jj += ldr; /* ldr*j */ |
1095 | 0 | } |
1096 | 0 | temp = enorm(n,wa1); |
1097 | 0 | parc = ((fp/delta)/temp)/temp; |
1098 | | /* |
1099 | | * depending on the sign of the function, update parl or paru. |
1100 | | */ |
1101 | 0 | if(fp > zero) |
1102 | 0 | parl = dmax1(parl, *par); |
1103 | 0 | if(fp < zero) |
1104 | 0 | paru = dmin1(paru, *par); |
1105 | | /* |
1106 | | * compute an improved estimate for par. |
1107 | | */ |
1108 | 0 | *par = dmax1(parl, *par + parc); |
1109 | | /* |
1110 | | * end of an iteration. |
1111 | | */ |
1112 | 0 | goto L150; |
1113 | | |
1114 | 0 | L220: |
1115 | | /* |
1116 | | * termination. |
1117 | | */ |
1118 | 0 | if(iter == 0) |
1119 | 0 | *par = zero; |
1120 | | /* |
1121 | | * last card of subroutine lmpar. |
1122 | | */ |
1123 | 0 | } |
1124 | | |
1125 | | /*********************** lmdif.c ****************************/ |
1126 | | |
1127 | | |
1128 | | |
1129 | | |
1130 | | void lmdif(int m,int n,Real* x,Real* fvec,Real ftol, |
1131 | | Real xtol,Real gtol,int maxfev,Real epsfcn, |
1132 | | Real* diag, int mode, Real factor, |
1133 | | int nprint, int* info,int* nfev,Real* fjac, |
1134 | | int ldfjac,int* ipvt,Real* qtf, |
1135 | | Real* wa1,Real* wa2,Real* wa3,Real* wa4, |
1136 | | const QuantLib::MINPACK::LmdifCostFunction& fcn, |
1137 | | const QuantLib::MINPACK::LmdifCostFunction& jacFcn) |
1138 | 0 | { |
1139 | | /* |
1140 | | * ********** |
1141 | | * |
1142 | | * subroutine lmdif |
1143 | | * |
1144 | | * the purpose of lmdif is to minimize the sum of the squares of |
1145 | | * m nonlinear functions in n variables by a modification of |
1146 | | * the levenberg-marquardt algorithm. the user must provide a |
1147 | | * subroutine which calculates the functions. the jacobian is |
1148 | | * then calculated by a forward-difference approximation. |
1149 | | * |
1150 | | * the subroutine statement is |
1151 | | * |
1152 | | * subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn, |
1153 | | * diag,mode,factor,nprint,info,nfev,fjac, |
1154 | | * ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4) |
1155 | | * |
1156 | | * where |
1157 | | * |
1158 | | * fcn is the name of the user-supplied subroutine which |
1159 | | * calculates the functions. fcn must be declared |
1160 | | * in an external statement in the user calling |
1161 | | * program, and should be written as follows. |
1162 | | * |
1163 | | * subroutine fcn(m,n,x,fvec,iflag) |
1164 | | * integer m,n,iflag |
1165 | | * double precision x(n),fvec(m) |
1166 | | * ---------- |
1167 | | * calculate the functions at x and |
1168 | | * return this vector in fvec. |
1169 | | * ---------- |
1170 | | * return |
1171 | | * end |
1172 | | * |
1173 | | * the value of iflag should not be changed by fcn unless |
1174 | | * the user wants to terminate execution of lmdif. |
1175 | | * in this case set iflag to a negative integer. |
1176 | | * |
1177 | | * m is a positive integer input variable set to the number |
1178 | | * of functions. |
1179 | | * |
1180 | | * n is a positive integer input variable set to the number |
1181 | | * of variables. n must not exceed m. |
1182 | | * |
1183 | | * x is an array of length n. on input x must contain |
1184 | | * an initial estimate of the solution vector. on output x |
1185 | | * contains the final estimate of the solution vector. |
1186 | | * |
1187 | | * fvec is an output array of length m which contains |
1188 | | * the functions evaluated at the output x. |
1189 | | * |
1190 | | * ftol is a nonnegative input variable. termination |
1191 | | * occurs when both the actual and predicted relative |
1192 | | * reductions in the sum of squares are at most ftol. |
1193 | | * therefore, ftol measures the relative error desired |
1194 | | * in the sum of squares. |
1195 | | * |
1196 | | * xtol is a nonnegative input variable. termination |
1197 | | * occurs when the relative error between two consecutive |
1198 | | * iterates is at most xtol. therefore, xtol measures the |
1199 | | * relative error desired in the approximate solution. |
1200 | | * |
1201 | | * gtol is a nonnegative input variable. termination |
1202 | | * occurs when the cosine of the angle between fvec and |
1203 | | * any column of the jacobian is at most gtol in absolute |
1204 | | * value. therefore, gtol measures the orthogonality |
1205 | | * desired between the function vector and the columns |
1206 | | * of the jacobian. |
1207 | | * |
1208 | | * maxfev is a positive integer input variable. termination |
1209 | | * occurs when the number of calls to fcn is at least |
1210 | | * maxfev by the end of an iteration. |
1211 | | * |
1212 | | * epsfcn is an input variable used in determining a suitable |
1213 | | * step length for the forward-difference approximation. this |
1214 | | * approximation assumes that the relative errors in the |
1215 | | * functions are of the order of epsfcn. if epsfcn is less |
1216 | | * than the machine precision, it is assumed that the relative |
1217 | | * errors in the functions are of the order of the machine |
1218 | | * precision. |
1219 | | * |
1220 | | * diag is an array of length n. if mode = 1 (see |
1221 | | * below), diag is internally set. if mode = 2, diag |
1222 | | * must contain positive entries that serve as |
1223 | | * multiplicative scale factors for the variables. |
1224 | | * |
1225 | | * mode is an integer input variable. if mode = 1, the |
1226 | | * variables will be scaled internally. if mode = 2, |
1227 | | * the scaling is specified by the input diag. other |
1228 | | * values of mode are equivalent to mode = 1. |
1229 | | * |
1230 | | * factor is a positive input variable used in determining the |
1231 | | * initial step bound. this bound is set to the product of |
1232 | | * factor and the euclidean norm of diag*x if nonzero, or else |
1233 | | * to factor itself. in most cases factor should lie in the |
1234 | | * interval (.1,100.). 100. is a generally recommended value. |
1235 | | * |
1236 | | * nprint is an integer input variable that enables controlled |
1237 | | * printing of iterates if it is positive. in this case, |
1238 | | * fcn is called with iflag = 0 at the beginning of the first |
1239 | | * iteration and every nprint iterations thereafter and |
1240 | | * immediately prior to return, with x and fvec available |
1241 | | * for printing. if nprint is not positive, no special calls |
1242 | | * of fcn with iflag = 0 are made. |
1243 | | * |
1244 | | * info is an integer output variable. if the user has |
1245 | | * terminated execution, info is set to the (negative) |
1246 | | * value of iflag. see description of fcn. otherwise, |
1247 | | * info is set as follows. |
1248 | | * |
1249 | | * info = 0 improper input parameters. |
1250 | | * |
1251 | | * info = 1 both actual and predicted relative reductions |
1252 | | * in the sum of squares are at most ftol. |
1253 | | * |
1254 | | * info = 2 relative error between two consecutive iterates |
1255 | | * is at most xtol. |
1256 | | * |
1257 | | * info = 3 conditions for info = 1 and info = 2 both hold. |
1258 | | * |
1259 | | * info = 4 the cosine of the angle between fvec and any |
1260 | | * column of the jacobian is at most gtol in |
1261 | | * absolute value. |
1262 | | * |
1263 | | * info = 5 number of calls to fcn has reached or |
1264 | | * exceeded maxfev. |
1265 | | * |
1266 | | * info = 6 ftol is too small. no further reduction in |
1267 | | * the sum of squares is possible. |
1268 | | * |
1269 | | * info = 7 xtol is too small. no further improvement in |
1270 | | * the approximate solution x is possible. |
1271 | | * |
1272 | | * info = 8 gtol is too small. fvec is orthogonal to the |
1273 | | * columns of the jacobian to machine precision. |
1274 | | * |
1275 | | * nfev is an integer output variable set to the number of |
1276 | | * calls to fcn. |
1277 | | * |
1278 | | * fjac is an output m by n array. the upper n by n submatrix |
1279 | | * of fjac contains an upper triangular matrix r with |
1280 | | * diagonal elements of nonincreasing magnitude such that |
1281 | | * |
1282 | | * t t t |
1283 | | * p *(jac *jac)*p = r *r, |
1284 | | * |
1285 | | * where p is a permutation matrix and jac is the final |
1286 | | * calculated jacobian. column j of p is column ipvt(j) |
1287 | | * (see below) of the identity matrix. the lower trapezoidal |
1288 | | * part of fjac contains information generated during |
1289 | | * the computation of r. |
1290 | | * |
1291 | | * ldfjac is a positive integer input variable not less than m |
1292 | | * which specifies the leading dimension of the array fjac. |
1293 | | * |
1294 | | * ipvt is an integer output array of length n. ipvt |
1295 | | * defines a permutation matrix p such that jac*p = q*r, |
1296 | | * where jac is the final calculated jacobian, q is |
1297 | | * orthogonal (not stored), and r is upper triangular |
1298 | | * with diagonal elements of nonincreasing magnitude. |
1299 | | * column j of p is column ipvt(j) of the identity matrix. |
1300 | | * |
1301 | | * qtf is an output array of length n which contains |
1302 | | * the first n elements of the vector (q transpose)*fvec. |
1303 | | * |
1304 | | * wa1, wa2, and wa3 are work arrays of length n. |
1305 | | * |
1306 | | * wa4 is a work array of length m. |
1307 | | * |
1308 | | * subprograms called |
1309 | | * |
1310 | | * user-supplied ...... fcn, jacFcn |
1311 | | * |
1312 | | * minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac |
1313 | | * |
1314 | | * fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod |
1315 | | * |
1316 | | * argonne national laboratory. minpack project. march 1980. |
1317 | | * burton s. garbow, kenneth e. hillstrom, jorge j. more |
1318 | | * |
1319 | | * ********** |
1320 | | */ |
1321 | 0 | int i,iflag,ij,jj,iter,j,l; |
1322 | 0 | Real actred,delta=0,dirder,fnorm,fnorm1,gnorm; |
1323 | 0 | Real par,pnorm,prered,ratio; |
1324 | 0 | Real sum,temp,temp1,temp2,temp3,xnorm=0; |
1325 | 0 | static double one = 1.0; |
1326 | 0 | static double p1 = 0.1; |
1327 | 0 | static double p5 = 0.5; |
1328 | 0 | static double p25 = 0.25; |
1329 | 0 | static double p75 = 0.75; |
1330 | 0 | static double p0001 = 1.0e-4; |
1331 | 0 | static double zero = 0.0; |
1332 | |
|
1333 | 0 | *info = 0; |
1334 | 0 | iflag = 0; |
1335 | 0 | *nfev = 0; |
1336 | | /* |
1337 | | * check the input parameters for errors. |
1338 | | */ |
1339 | 0 | if( (n <= 0) || (m < n) || (ldfjac < m) || (ftol < zero) |
1340 | 0 | || (xtol < zero) || (gtol < zero) || (maxfev <= 0) |
1341 | 0 | || (factor <= zero) ) |
1342 | 0 | goto L300; |
1343 | | |
1344 | 0 | if( mode == 2 ) |
1345 | 0 | { /* scaling by diag[] */ |
1346 | 0 | for( j=0; j<n; j++ ) |
1347 | 0 | { |
1348 | 0 | if( diag[j] <= 0.0 ) |
1349 | 0 | goto L300; |
1350 | 0 | } |
1351 | 0 | } |
1352 | | /* |
1353 | | * evaluate the function at the starting point |
1354 | | * and calculate its norm. |
1355 | | */ |
1356 | 0 | iflag = 1; |
1357 | 0 | fcn(m,n,x,fvec,&iflag); |
1358 | 0 | *nfev = 1; |
1359 | 0 | if(iflag < 0) |
1360 | 0 | goto L300; |
1361 | 0 | fnorm = enorm(m,fvec); |
1362 | | /* |
1363 | | * initialize levenberg-marquardt parameter and iteration counter. |
1364 | | */ |
1365 | 0 | par = zero; |
1366 | 0 | iter = 1; |
1367 | | /* |
1368 | | * beginning of the outer loop. |
1369 | | */ |
1370 | |
|
1371 | 0 | L30: |
1372 | | |
1373 | | /* |
1374 | | * calculate the jacobian matrix. |
1375 | | */ |
1376 | 0 | iflag = 2; |
1377 | 0 | if (!jacFcn) |
1378 | 0 | fdjac2(m,n,x,fvec,fjac,ldfjac,&iflag,epsfcn,wa4, fcn); |
1379 | 0 | else // use user supplied jacobian calculation |
1380 | 0 | jacFcn(m,n,x,fjac,&iflag); |
1381 | 0 | *nfev += n; |
1382 | 0 | if(iflag < 0) |
1383 | 0 | goto L300; |
1384 | | /* |
1385 | | * if requested, call fcn to enable printing of iterates. |
1386 | | */ |
1387 | 0 | if( nprint > 0 ) |
1388 | 0 | { |
1389 | 0 | iflag = 0; |
1390 | 0 | if(mod(iter-1,nprint) == 0) |
1391 | 0 | { |
1392 | 0 | fcn(m,n,x,fvec,&iflag); |
1393 | 0 | if(iflag < 0) |
1394 | 0 | goto L300; |
1395 | 0 | } |
1396 | 0 | } |
1397 | | /* |
1398 | | * compute the qr factorization of the jacobian. |
1399 | | */ |
1400 | 0 | qrfac(m,n,fjac,ldfjac,1,ipvt,n,wa1,wa2,wa3); |
1401 | | /* |
1402 | | * on the first iteration and if mode is 1, scale according |
1403 | | * to the norms of the columns of the initial jacobian. |
1404 | | */ |
1405 | 0 | if(iter == 1) |
1406 | 0 | { |
1407 | 0 | if(mode != 2) |
1408 | 0 | { |
1409 | 0 | for( j=0; j<n; j++ ) |
1410 | 0 | { |
1411 | 0 | diag[j] = wa2[j]; |
1412 | 0 | if( wa2[j] == zero ) |
1413 | 0 | diag[j] = one; |
1414 | 0 | } |
1415 | 0 | } |
1416 | | |
1417 | | /* |
1418 | | * on the first iteration, calculate the norm of the scaled x |
1419 | | * and initialize the step bound delta. |
1420 | | */ |
1421 | 0 | for( j=0; j<n; j++ ) |
1422 | 0 | wa3[j] = diag[j] * x[j]; |
1423 | |
|
1424 | 0 | xnorm = enorm(n,wa3); |
1425 | 0 | delta = factor*xnorm; |
1426 | 0 | if(delta == zero) |
1427 | 0 | delta = factor; |
1428 | 0 | } |
1429 | | |
1430 | | /* |
1431 | | * form (q transpose)*fvec and store the first n components in |
1432 | | * qtf. |
1433 | | */ |
1434 | 0 | for( i=0; i<m; i++ ) |
1435 | 0 | wa4[i] = fvec[i]; |
1436 | 0 | jj = 0; |
1437 | 0 | for( j=0; j<n; j++ ) |
1438 | 0 | { |
1439 | 0 | temp3 = fjac[jj]; |
1440 | 0 | if(temp3 != zero) |
1441 | 0 | { |
1442 | 0 | sum = zero; |
1443 | 0 | ij = jj; |
1444 | 0 | for( i=j; i<m; i++ ) |
1445 | 0 | { |
1446 | 0 | sum += fjac[ij] * wa4[i]; |
1447 | 0 | ij += 1; /* fjac[i+m*j] */ |
1448 | 0 | } |
1449 | 0 | temp = -sum / temp3; |
1450 | 0 | ij = jj; |
1451 | 0 | for( i=j; i<m; i++ ) |
1452 | 0 | { |
1453 | 0 | wa4[i] += fjac[ij] * temp; |
1454 | 0 | ij += 1; /* fjac[i+m*j] */ |
1455 | 0 | } |
1456 | 0 | } |
1457 | 0 | fjac[jj] = wa1[j]; |
1458 | 0 | jj += m+1; /* fjac[j+m*j] */ |
1459 | 0 | qtf[j] = wa4[j]; |
1460 | 0 | } |
1461 | | |
1462 | | /* |
1463 | | * compute the norm of the scaled gradient. |
1464 | | */ |
1465 | 0 | gnorm = zero; |
1466 | 0 | if(fnorm != zero) |
1467 | 0 | { |
1468 | 0 | jj = 0; |
1469 | 0 | for( j=0; j<n; j++ ) |
1470 | 0 | { |
1471 | 0 | l = ipvt[j]; |
1472 | 0 | if(wa2[l] != zero) |
1473 | 0 | { |
1474 | 0 | sum = zero; |
1475 | 0 | ij = jj; |
1476 | 0 | for( i=0; i<=j; i++ ) |
1477 | 0 | { |
1478 | 0 | sum += fjac[ij]*(qtf[i]/fnorm); |
1479 | 0 | ij += 1; /* fjac[i+m*j] */ |
1480 | 0 | } |
1481 | 0 | gnorm = dmax1(gnorm,std::fabs(sum/wa2[l])); |
1482 | 0 | } |
1483 | 0 | jj += m; |
1484 | 0 | } |
1485 | 0 | } |
1486 | | |
1487 | | /* |
1488 | | * test for convergence of the gradient norm. |
1489 | | */ |
1490 | 0 | if(gnorm <= gtol) |
1491 | 0 | *info = 4; |
1492 | 0 | if( *info != 0) |
1493 | 0 | goto L300; |
1494 | | /* |
1495 | | * rescale if necessary. |
1496 | | */ |
1497 | 0 | if(mode != 2) |
1498 | 0 | { |
1499 | 0 | for( j=0; j<n; j++ ) |
1500 | 0 | diag[j] = dmax1(diag[j],wa2[j]); |
1501 | 0 | } |
1502 | | |
1503 | | /* |
1504 | | * beginning of the inner loop. |
1505 | | */ |
1506 | 0 | L200: |
1507 | | /* |
1508 | | * determine the levenberg-marquardt parameter. |
1509 | | */ |
1510 | 0 | lmpar(n,fjac,ldfjac,ipvt,diag,qtf,delta,&par,wa1,wa2,wa3,wa4); |
1511 | | /* |
1512 | | * store the direction p and x + p. calculate the norm of p. |
1513 | | */ |
1514 | 0 | for( j=0; j<n; j++ ) |
1515 | 0 | { |
1516 | 0 | wa1[j] = -wa1[j]; |
1517 | 0 | wa2[j] = x[j] + wa1[j]; |
1518 | 0 | wa3[j] = diag[j]*wa1[j]; |
1519 | 0 | } |
1520 | 0 | pnorm = enorm(n,wa3); |
1521 | | /* |
1522 | | * on the first iteration, adjust the initial step bound. |
1523 | | */ |
1524 | 0 | if(iter == 1) |
1525 | 0 | delta = dmin1(delta,pnorm); |
1526 | | /* |
1527 | | * evaluate the function at x + p and calculate its norm. |
1528 | | */ |
1529 | 0 | iflag = 1; |
1530 | 0 | fcn(m,n,wa2,wa4,&iflag); |
1531 | 0 | *nfev += 1; |
1532 | 0 | if(iflag < 0) |
1533 | 0 | goto L300; |
1534 | 0 | fnorm1 = enorm(m,wa4); |
1535 | | /* |
1536 | | * compute the scaled actual reduction. |
1537 | | */ |
1538 | 0 | actred = -one; |
1539 | 0 | if( (p1*fnorm1) < fnorm) |
1540 | 0 | { |
1541 | 0 | temp = fnorm1/fnorm; |
1542 | 0 | actred = one - temp * temp; |
1543 | 0 | } |
1544 | | /* |
1545 | | * compute the scaled predicted reduction and |
1546 | | * the scaled directional derivative. |
1547 | | */ |
1548 | 0 | jj = 0; |
1549 | 0 | for( j=0; j<n; j++ ) |
1550 | 0 | { |
1551 | 0 | wa3[j] = zero; |
1552 | 0 | l = ipvt[j]; |
1553 | 0 | temp = wa1[l]; |
1554 | 0 | ij = jj; |
1555 | 0 | for( i=0; i<=j; i++ ) |
1556 | 0 | { |
1557 | 0 | wa3[i] += fjac[ij]*temp; |
1558 | 0 | ij += 1; /* fjac[i+m*j] */ |
1559 | 0 | } |
1560 | 0 | jj += m; |
1561 | 0 | } |
1562 | 0 | temp1 = enorm(n,wa3)/fnorm; |
1563 | 0 | temp2 = (std::sqrt(par)*pnorm)/fnorm; |
1564 | 0 | prered = temp1*temp1 + (temp2*temp2)/p5; |
1565 | 0 | dirder = -(temp1*temp1 + temp2*temp2); |
1566 | | /* |
1567 | | * compute the ratio of the actual to the predicted |
1568 | | * reduction. |
1569 | | */ |
1570 | 0 | ratio = zero; |
1571 | 0 | if(prered != zero) |
1572 | 0 | ratio = actred/prered; |
1573 | | /* |
1574 | | * update the step bound. |
1575 | | */ |
1576 | 0 | if(ratio <= p25) |
1577 | 0 | { |
1578 | 0 | if(actred >= zero) |
1579 | 0 | temp = p5; |
1580 | 0 | else |
1581 | 0 | temp = p5*dirder/(dirder + p5*actred); |
1582 | 0 | if( ((p1*fnorm1) >= fnorm) |
1583 | 0 | || (temp < p1) ) |
1584 | 0 | temp = p1; |
1585 | 0 | delta = temp*dmin1(delta,pnorm/p1); |
1586 | 0 | par = par/temp; |
1587 | 0 | } |
1588 | 0 | else |
1589 | 0 | { |
1590 | 0 | if( (par == zero) || (ratio >= p75) ) |
1591 | 0 | { |
1592 | 0 | delta = pnorm/p5; |
1593 | 0 | par = p5*par; |
1594 | 0 | } |
1595 | 0 | } |
1596 | | /* |
1597 | | * test for successful iteration. |
1598 | | */ |
1599 | 0 | if(ratio >= p0001) |
1600 | 0 | { |
1601 | | /* |
1602 | | * successful iteration. update x, fvec, and their norms. |
1603 | | */ |
1604 | 0 | for( j=0; j<n; j++ ) |
1605 | 0 | { |
1606 | 0 | x[j] = wa2[j]; |
1607 | 0 | wa2[j] = diag[j]*x[j]; |
1608 | 0 | } |
1609 | 0 | for( i=0; i<m; i++ ) |
1610 | 0 | fvec[i] = wa4[i]; |
1611 | 0 | xnorm = enorm(n,wa2); |
1612 | 0 | fnorm = fnorm1; |
1613 | 0 | iter += 1; |
1614 | 0 | } |
1615 | | /* |
1616 | | * tests for convergence. |
1617 | | */ |
1618 | 0 | if( (std::fabs(actred) <= ftol) |
1619 | 0 | && (prered <= ftol) |
1620 | 0 | && (p5*ratio <= one) ) |
1621 | 0 | *info = 1; |
1622 | 0 | if(delta <= xtol*xnorm) |
1623 | 0 | *info = 2; |
1624 | 0 | if( (std::fabs(actred) <= ftol) |
1625 | 0 | && (prered <= ftol) |
1626 | 0 | && (p5*ratio <= one) |
1627 | 0 | && ( *info == 2) ) |
1628 | 0 | *info = 3; |
1629 | 0 | if( *info != 0) |
1630 | 0 | goto L300; |
1631 | | /* |
1632 | | * tests for termination and stringent tolerances. |
1633 | | */ |
1634 | 0 | if( *nfev >= maxfev) |
1635 | 0 | *info = 5; |
1636 | 0 | if( (std::fabs(actred) <= MACHEP) |
1637 | 0 | && (prered <= MACHEP) |
1638 | 0 | && (p5*ratio <= one) ) |
1639 | 0 | *info = 6; |
1640 | 0 | if(delta <= MACHEP*xnorm) |
1641 | 0 | *info = 7; |
1642 | 0 | if(gnorm <= MACHEP) |
1643 | 0 | *info = 8; |
1644 | 0 | if( *info != 0) |
1645 | 0 | goto L300; |
1646 | | /* |
1647 | | * end of the inner loop. repeat if iteration unsuccessful. |
1648 | | */ |
1649 | 0 | if(ratio < p0001) |
1650 | 0 | goto L200; |
1651 | | /* |
1652 | | * end of the outer loop. |
1653 | | */ |
1654 | 0 | goto L30; |
1655 | | |
1656 | 0 | L300: |
1657 | | /* |
1658 | | * termination, either normal or user imposed. |
1659 | | */ |
1660 | 0 | if(iflag < 0) |
1661 | 0 | *info = iflag; |
1662 | 0 | iflag = 0; |
1663 | 0 | if(nprint > 0) |
1664 | 0 | fcn(m,n,x,fvec,&iflag); |
1665 | | /* |
1666 | | last card of subroutine lmdif. |
1667 | | */ |
1668 | 0 | } |
1669 | | } |
1670 | | |
1671 | | /************************fdjac2.c*************************/ |
1672 | | |