From f2ff0d3903f4f0e0945a049f0f60f380bc3450d4 Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Fri, 21 Aug 2009 03:13:42 +0200
Subject: [PATCH] merging ei_hybrd() and hybrd_template()

---
 .../Eigen/src/NonLinear/MathFunctions.h       |  48 ------
 unsupported/Eigen/src/NonLinear/hybrd.h       | 156 +++++++++---------
 2 files changed, 79 insertions(+), 125 deletions(-)
diff --git a/unsupported/Eigen/src/NonLinear/MathFunctions.h b/unsupported/Eigen/src/NonLinear/MathFunctions.h
index 3a18392f0..ec5c7f57e 100644
--- a/unsupported/Eigen/src/NonLinear/MathFunctions.h
+++ b/unsupported/Eigen/src/NonLinear/MathFunctions.h
@@ -25,54 +25,6 @@
 #ifndef EIGEN_NONLINEAR_MATHFUNCTIONS_H
 #define EIGEN_NONLINEAR_MATHFUNCTIONS_H
 
-template<typename Functor, typename Scalar>
-int ei_hybrd(
-        Matrix< Scalar, Dynamic, 1 >  &x,
-        Matrix< Scalar, Dynamic, 1 >  &fvec,
-        int &nfev,
-        Matrix< Scalar, Dynamic, Dynamic > &fjac,
-        Matrix< Scalar, Dynamic, 1 >  &R,
-        Matrix< Scalar, Dynamic, 1 >  &qtf,
-        Matrix< Scalar, Dynamic, 1 >  &diag,
-        int mode=1,
-        int nb_of_subdiagonals = -1,
-        int nb_of_superdiagonals = -1,
-        int maxfev = 2000,
-        Scalar factor = Scalar(100.),
-        Scalar xtol = ei_sqrt(epsilon<Scalar>()),
-        Scalar epsfcn = Scalar(0.),
-        int nprint=0
-        )
-{
-    int n = x.size();
-    int lr = (n*(n+1))/2;
-    Matrix< Scalar, Dynamic, 1 > wa1(n), wa2(n), wa3(n), wa4(n);
-
-
-    if (nb_of_subdiagonals<0) nb_of_subdiagonals = n-1;
-    if (nb_of_superdiagonals<0) nb_of_superdiagonals = n-1;
-    fvec.resize(n);
-    qtf.resize(n);
-    R.resize(lr);
-    int ldfjac = n;
-    fjac.resize(ldfjac, n);
-    return hybrd_template<Scalar>(
-            Functor::f, 0,
-            n, x.data(), fvec.data(),
-            xtol, maxfev,
-            nb_of_subdiagonals, nb_of_superdiagonals,
-            epsfcn, 
-            diag.data(), mode, 
-            factor,
-            nprint, 
-            nfev,
-            fjac.data(), ldfjac,
-            R.data(), lr,
-            qtf.data(),
-            wa1.data(), wa2.data(), wa3.data(), wa4.data()
-    );
-}
-
 
 template<typename Functor, typename Scalar>
 int ei_hybrj(
diff --git a/unsupported/Eigen/src/NonLinear/hybrd.h b/unsupported/Eigen/src/NonLinear/hybrd.h
index f7770c4c4..1a61f57de 100644
--- a/unsupported/Eigen/src/NonLinear/hybrd.h
+++ b/unsupported/Eigen/src/NonLinear/hybrd.h
@@ -1,18 +1,38 @@
 
-template<typename Scalar>
-int hybrd_template(minpack_func_nn fcn, void *p, int n, Scalar *x, Scalar *
-        fvec, Scalar xtol, int maxfev, int ml, int mu, 
-        Scalar epsfcn, Scalar *diag, int mode, Scalar factor, int nprint, int &nfev, Scalar *
-        fjac, int ldfjac, Scalar *r__, int lr, Scalar *qtf, 
-        Scalar *wa1, Scalar *wa2, Scalar *wa3, Scalar *wa4)
+template<typename Functor, typename Scalar>
+int ei_hybrd(
+        Matrix< Scalar, Dynamic, 1 >  &x,
+        Matrix< Scalar, Dynamic, 1 >  &fvec,
+        int &nfev,
+        Matrix< Scalar, Dynamic, Dynamic > &fjac,
+        Matrix< Scalar, Dynamic, 1 >  &R,
+        Matrix< Scalar, Dynamic, 1 >  &qtf,
+        Matrix< Scalar, Dynamic, 1 >  &diag,
+        int mode=1,
+        int nb_of_subdiagonals = -1,
+        int nb_of_superdiagonals = -1,
+        int maxfev = 2000,
+        Scalar factor = Scalar(100.),
+        Scalar xtol = ei_sqrt(epsilon<Scalar>()),
+        Scalar epsfcn = Scalar(0.),
+        int nprint=0
+        )
 {
-    /* Initialized data */
+    const int n = x.size();
+    int lr = (n*(n+1))/2;
+    Matrix< Scalar, Dynamic, 1 > wa1(n), wa2(n), wa3(n), wa4(n);
 
-    /* System generated locals */
-    int fjac_offset;
+
+    if (nb_of_subdiagonals<0) nb_of_subdiagonals = n-1;
+    if (nb_of_superdiagonals<0) nb_of_superdiagonals = n-1;
+    fvec.resize(n);
+    qtf.resize(n);
+    R.resize(lr);
+    int ldfjac = n;
+    fjac.resize(ldfjac, n);
 
     /* Local variables */
-    int i, j, l, jm1, iwa[1];
+    int i, j, l, iwa[1];
     Scalar sum;
     int sing;
     int iter;
@@ -29,19 +49,6 @@ int hybrd_template(minpack_func_nn fcn, void *p, int n, Scalar *x, Scalar *
     Scalar actred, prered;
     int info;
 
-    /* Parameter adjustments */
-    --wa4;
-    --wa3;
-    --wa2;
-    --wa1;
-    --qtf;
-    --diag;
-    --fvec;
-    --x;
-    fjac_offset = 1 + ldfjac;
-    fjac -= fjac_offset;
-    --r__;
-
     /* Function Body */
 
     info = 0;
@@ -50,14 +57,14 @@ int hybrd_template(minpack_func_nn fcn, void *p, int n, Scalar *x, Scalar *
 
     /*     check the input parameters for errors. */
 
-    if (n <= 0 || xtol < 0. || maxfev <= 0 || ml < 0 || mu < 0 ||
+    if (n <= 0 || xtol < 0. || maxfev <= 0 || nb_of_subdiagonals < 0 || nb_of_superdiagonals < 0 ||
             factor <= 0. || ldfjac < n || lr < n * (n + 1) / 2) {
         goto L300;
     }
     if (mode != 2) {
         goto L20;
     }
-    for (j = 1; j <= n; ++j) {
+    for (j = 0; j < n; ++j) {
         if (diag[j] <= 0.) {
             goto L300;
         }
@@ -68,18 +75,18 @@ L20:
     /*     evaluate the function at the starting point */
     /*     and calculate its norm. */
 
-    iflag = (*fcn)(p, n, &x[1], &fvec[1], 1);
+    iflag = Functor::f(0, n, x.data(), fvec.data(), 1);
     nfev = 1;
     if (iflag < 0) {
         goto L300;
     }
-    fnorm = ei_enorm<Scalar>(n, &fvec[1]);
+    fnorm = fvec.stableNorm();
 
     /*     determine the number of calls to fcn needed to compute */
     /*     the jacobian matrix. */
 
     /* Computing MIN */
-    msum = min(ml + mu + 1, n);
+    msum = min(nb_of_subdiagonals + nb_of_superdiagonals + 1, n);
 
     /*     initialize iteration counter and monitors. */
 
@@ -96,8 +103,8 @@ L30:
 
     /*        calculate the jacobian matrix. */
 
-    iflag = fdjac1(fcn, p, n, &x[1], &fvec[1], &fjac[fjac_offset], ldfjac,
-            ml, mu, epsfcn, &wa1[1], &wa2[1]);
+    iflag = fdjac1(Functor::f, 0, n, x.data(), fvec.data(), fjac.data(), ldfjac,
+            nb_of_subdiagonals, nb_of_superdiagonals, epsfcn, wa1.data(), wa2.data());
     nfev += msum;
     if (iflag < 0) {
         goto L300;
@@ -105,8 +112,7 @@ L30:
 
     /*        compute the qr factorization of the jacobian. */
 
-    qrfac(n, n, &fjac[fjac_offset], ldfjac, FALSE_, iwa, 1, &wa1[1], &
-            wa2[1], &wa3[1]);
+    qrfac(n, n, fjac.data(), ldfjac, FALSE_, iwa, 1, wa1.data(), wa2.data(), wa3.data());
 
     /*        on the first iteration and if mode is 1, scale according */
     /*        to the norms of the columns of the initial jacobian. */
@@ -117,7 +123,7 @@ L30:
     if (mode == 2) {
         goto L50;
     }
-    for (j = 1; j <= n; ++j) {
+    for (j = 0; j < n; ++j) {
         diag[j] = wa2[j];
         if (wa2[j] == 0.) {
             diag[j] = 1.;
@@ -129,11 +135,11 @@ L50:
     /*        on the first iteration, calculate the norm of the scaled x */
     /*        and initialize the step bound delta. */
 
-    for (j = 1; j <= n; ++j) {
+    for (j = 0; j < n; ++j) {
         wa3[j] = diag[j] * x[j];
         /* L60: */
     }
-    xnorm = ei_enorm<Scalar>(n, &wa3[1]);
+    xnorm = wa3.stableNorm();
     delta = factor * xnorm;
     if (delta == 0.) {
         delta = factor;
@@ -142,22 +148,22 @@ L70:
 
     /*        form (q transpose)*fvec and store in qtf. */
 
-    for (i = 1; i <= n; ++i) {
+    for (i = 0; i < n; ++i) {
         qtf[i] = fvec[i];
         /* L80: */
     }
-    for (j = 1; j <= n; ++j) {
-        if (fjac[j + j * ldfjac] == 0.) {
+    for (j = 0; j < n; ++j) {
+        if (fjac(j,j) == 0.) {
             goto L110;
         }
         sum = 0.;
-        for (i = j; i <= n; ++i) {
-            sum += fjac[i + j * ldfjac] * qtf[i];
+        for (i = j; i < n; ++i) {
+            sum += fjac(i,j) * qtf[i];
             /* L90: */
         }
-        temp = -sum / fjac[j + j * ldfjac];
-        for (i = j; i <= n; ++i) {
-            qtf[i] += fjac[i + j * ldfjac] * temp;
+        temp = -sum / fjac(j,j);
+        for (i = j; i < n; ++i) {
+            qtf[i] += fjac(i,j) * temp;
             /* L100: */
         }
 L110:
@@ -168,19 +174,16 @@ L110:
     /*        copy the triangular factor of the qr factorization into r. */
 
     sing = FALSE_;
-    for (j = 1; j <= n; ++j) {
+    for (j = 0; j < n; ++j) {
         l = j;
-        jm1 = j - 1;
-        if (jm1 < 1) {
-            goto L140;
+        if (j) {
+            for (i = 0; i < j; ++i) {
+                R[l] = fjac(i,j);
+                l = l + n - i -1;
+                /* L130: */
+            }
         }
-        for (i = 1; i <= jm1; ++i) {
-            r__[l] = fjac[i + j * ldfjac];
-            l = l + n - i;
-            /* L130: */
-        }
-L140:
-        r__[l] = wa1[j];
+        R[l] = wa1[j];
         if (wa1[j] == 0.) {
             sing = TRUE_;
         }
@@ -189,7 +192,7 @@ L140:
 
     /*        accumulate the orthogonal factor in fjac. */
 
-    qform(n, n, &fjac[fjac_offset], ldfjac, &wa1[1]);
+    qform(n, n, fjac.data(), ldfjac, wa1.data());
 
     /*        rescale if necessary. */
 
@@ -197,7 +200,7 @@ L140:
         goto L170;
     }
     /* Computing MAX */
-    for (j = 1; j <= n; ++j)
+    for (j = 0; j < n; ++j)
         diag[j] = max(diag[j], wa2[j]);
 L170:
 
@@ -212,7 +215,7 @@ L180:
     }
     iflag = 0;
     if ((iter - 1) % nprint == 0) {
-        iflag = (*fcn)(p, n, &x[1], &fvec[1], 0);
+        iflag = Functor::f(0, n, x.data(), fvec.data(), 0);
     }
     if (iflag < 0) {
         goto L300;
@@ -221,18 +224,17 @@ L190:
 
     /*           determine the direction p. */
 
-    dogleg(n, &r__[1], lr, &diag[1], &qtf[1], delta, &wa1[1], &wa2[1], &wa3[
-            1]);
+    dogleg(n, R.data(), lr, diag.data(), qtf.data(), delta, wa1.data(), wa2.data(), wa3.data());
 
     /*           store the direction p and x + p. calculate the norm of p. */
 
-    for (j = 1; j <= n; ++j) {
+    for (j = 0; j < n; ++j) {
         wa1[j] = -wa1[j];
         wa2[j] = x[j] + wa1[j];
         wa3[j] = diag[j] * wa1[j];
         /* L200: */
     }
-    pnorm = ei_enorm<Scalar>(n, &wa3[1]);
+    pnorm = wa3.stableNorm();
 
     /*           on the first iteration, adjust the initial step bound. */
 
@@ -242,12 +244,12 @@ L190:
 
     /*           evaluate the function at x + p and calculate its norm. */
 
-    iflag = (*fcn)(p, n, &wa2[1], &wa4[1], 1);
+    iflag = Functor::f(0, n, wa2.data(), wa4.data(), 1);
     ++(nfev);
     if (iflag < 0) {
         goto L300;
     }
-    fnorm1 = ei_enorm<Scalar>(n, &wa4[1]);
+    fnorm1 = wa4.stableNorm();
 
     /*           compute the scaled actual reduction. */
 
@@ -257,18 +259,18 @@ L190:
 
     /*           compute the scaled predicted reduction. */
 
-    l = 1;
-    for (i = 1; i <= n; ++i) {
+    l = 0;
+    for (i = 0; i < n; ++i) {
         sum = 0.;
-        for (j = i; j <= n; ++j) {
-            sum += r__[l] * wa1[j];
+        for (j = i; j < n; ++j) {
+            sum += R[l] * wa1[j];
             ++l;
             /* L210: */
         }
         wa3[i] = qtf[i] + sum;
         /* L220: */
     }
-    temp = ei_enorm<Scalar>(n, &wa3[1]);
+    temp = wa3.stableNorm();
     prered = 0.;
     if (temp < fnorm) /* Computing 2nd power */
         prered = 1. - ei_abs2(temp / fnorm);
@@ -308,13 +310,13 @@ L240:
 
     /*           successful iteration. update x, fvec, and their norms. */
 
-    for (j = 1; j <= n; ++j) {
+    for (j = 0; j < n; ++j) {
         x[j] = wa2[j];
         wa2[j] = diag[j] * x[j];
         fvec[j] = wa4[j];
         /* L250: */
     }
-    xnorm = ei_enorm<Scalar>(n, &wa2[1]);
+    temp = wa2.stableNorm();
     fnorm = fnorm1;
     ++iter;
 L260:
@@ -365,10 +367,10 @@ L260:
     /*           calculate the rank one modification to the jacobian */
     /*           and update qtf if necessary. */
 
-    for (j = 1; j <= n; ++j) {
+    for (j = 0; j < n; ++j) {
         sum = 0.;
-        for (i = 1; i <= n; ++i) {
-            sum += fjac[i + j * ldfjac] * wa4[i];
+        for (i = 0; i < n; ++i) {
+            sum += fjac(i,j) * wa4[i];
             /* L270: */
         }
         wa2[j] = (sum - wa3[j]) / pnorm;
@@ -381,9 +383,9 @@ L260:
 
     /*           compute the qr factorization of the updated jacobian. */
 
-    r1updt(n, n, &r__[1], lr, &wa1[1], &wa2[1], &wa3[1], &sing);
-    r1mpyq(n, n, &fjac[fjac_offset], ldfjac, &wa2[1], &wa3[1]);
-    r1mpyq(1, n, &qtf[1], 1, &wa2[1], &wa3[1]);
+    r1updt(n, n, R.data(), lr, wa1.data(), wa2.data(), wa3.data(), &sing);
+    r1mpyq(n, n, fjac.data(), ldfjac, wa2.data(), wa3.data());
+    r1mpyq(1, n, qtf.data(), 1, wa2.data(), wa3.data());
 
     /*           end of the inner loop. */
 
@@ -402,7 +404,7 @@ L300:
         info = iflag;
     }
     if (nprint > 0) {
-        (*fcn)(p, n, &x[1], &fvec[1], 0);
+        Functor::f(0, n, x.data(), fvec.data(), 0);
     }
     return info;