From 525da6a464c897d0fe4e401a65851bcd7567fc5a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 16 Jul 2009 14:20:36 +0200 Subject: [PATCH] bugfix in blueNorm --- Eigen/src/Core/Dot.h | 39 +++--------- Eigen/src/Core/NumTraits.h | 8 +-- bench/bench_norm.cpp | 126 +++++++++++++++++++++++-------------- 3 files changed, 89 insertions(+), 84 deletions(-) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index c5f2e8505..d97f5837f 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -304,29 +304,6 @@ MatrixBase::stableNorm() const return this->cwise().abs().redux(ei_scalar_hypot_op()); } -/** \internal Computes ibeta^iexp by binary expansion of iexp, - * exact if ibeta is the machine base */ -template inline T bexp(int ibeta, int iexp) -{ - T tbeta = T(ibeta); - T res = 1.0; - int n = iexp; - if (n<0) - { - n = - n; - tbeta = 1.0/tbeta; - } - for(;;) - { - if ((n % 2)==0) - res = res * tbeta; - n = n/2; - if (n==0) return res; - tbeta = tbeta*tbeta; - } - return res; -} - /** \returns the \em l2 norm of \c *this using the Blue's algorithm. * A Portable Fortran Program to Find the Euclidean Norm of a Vector, * ACM TOMS, Vol 4, Issue 1, 1978. @@ -337,7 +314,7 @@ template inline typename NumTraits::Scalar>::Real MatrixBase::blueNorm() const { - static int nmax; + static int nmax = -1; static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; int n; Scalar ax, abig, amed, asml; @@ -355,8 +332,8 @@ MatrixBase::blueNorm() const // are used. For any specific computer, each of the assignment // statements can be replaced nbig = std::numeric_limits::max(); // largest integer - ibeta = NumTraits::Base; // base for floating-point numbers - it = NumTraits::Mantissa; // number of base-beta digits in mantissa + ibeta = std::numeric_limits::radix; //NumTraits::Base; // base for floating-point numbers + it = std::numeric_limits::digits; //NumTraits::Mantissa; // number of base-beta digits in mantissa iemin = std::numeric_limits::min_exponent; // minimum exponent iemax = std::numeric_limits::max_exponent; // maximum exponent rbig = std::numeric_limits::max(); // largest floating-point number @@ -368,17 +345,17 @@ MatrixBase::blueNorm() const ei_assert(false && "the algorithm cannot be guaranteed on this computer"); } iexp = -((1-iemin)/2); - b1 = bexp(ibeta, iexp); // lower boundary of midrange + b1 = std::pow(ibeta, iexp); // lower boundary of midrange iexp = (iemax + 1 - it)/2; - b2 = bexp(ibeta,iexp); // upper boundary of midrange + b2 = std::pow(ibeta,iexp); // upper boundary of midrange iexp = (2-iemin)/2; - s1m = bexp(ibeta,iexp); // scaling factor for lower range + s1m = std::pow(ibeta,iexp); // scaling factor for lower range iexp = - ((iemax+it)/2); - s2m = bexp(ibeta,iexp); // scaling factor for upper range + s2m = std::pow(ibeta,iexp); // scaling factor for upper range overfl = rbig*s2m; // overfow boundary for abig - eps = bexp(ibeta, 1-it); + eps = std::pow(ibeta, 1-it); relerr = ei_sqrt(eps); // tolerance for neglecting asml abig = 1.0/eps - 1.0; if (Scalar(nbig)>abig) nmax = abig; // largest safe n diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index dec07a036..24afe54c5 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -70,9 +70,7 @@ template<> struct NumTraits HasFloatingPoint = 1, ReadCost = 1, AddCost = 1, - MulCost = 1, - Base = 2, - Mantissa = 23 + MulCost = 1 }; }; @@ -85,9 +83,7 @@ template<> struct NumTraits HasFloatingPoint = 1, ReadCost = 1, AddCost = 1, - MulCost = 1, - Base = 2, - Mantissa = 52 + MulCost = 1 }; }; diff --git a/bench/bench_norm.cpp b/bench/bench_norm.cpp index 76c8c574d..e06d06417 100644 --- a/bench/bench_norm.cpp +++ b/bench/bench_norm.cpp @@ -1,3 +1,4 @@ +#include #include #include "BenchTimer.h" using namespace Eigen; @@ -58,18 +59,23 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) return ei_sqrt(v(0)); } +#ifdef EIGEN_VECTORIZE Packet4f ei_plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); } Packet2d ei_plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); } Packet4f ei_pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); } Packet2d ei_pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); } +#endif template EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) { + #ifndef EIGEN_VECTORIZE + return v.blueNorm(); + #else typedef typename T::Scalar Scalar; - static int nmax; + static int nmax = 0; static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; int n; @@ -79,8 +85,8 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) Scalar abig, eps; nbig = std::numeric_limits::max(); // largest integer - ibeta = NumTraits::Base; // base for floating-point numbers - it = NumTraits::Mantissa; // number of base-beta digits in mantissa + ibeta = std::numeric_limits::radix; //NumTraits::Base; // base for floating-point numbers + it = std::numeric_limits::digits; //NumTraits::Mantissa; // number of base-beta digits in mantissa iemin = std::numeric_limits::min_exponent; // minimum exponent iemax = std::numeric_limits::max_exponent; // maximum exponent rbig = std::numeric_limits::max(); // largest floating-point number @@ -92,23 +98,23 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) ei_assert(false && "the algorithm cannot be guaranteed on this computer"); } iexp = -((1-iemin)/2); - b1 = bexp(ibeta, iexp); // lower boundary of midrange + b1 = std::pow(ibeta, iexp); // lower boundary of midrange iexp = (iemax + 1 - it)/2; - b2 = bexp(ibeta,iexp); // upper boundary of midrange + b2 = std::pow(ibeta,iexp); // upper boundary of midrange iexp = (2-iemin)/2; - s1m = bexp(ibeta,iexp); // scaling factor for lower range + s1m = std::pow(ibeta,iexp); // scaling factor for lower range iexp = - ((iemax+it)/2); - s2m = bexp(ibeta,iexp); // scaling factor for upper range + s2m = std::pow(ibeta,iexp); // scaling factor for upper range overfl = rbig*s2m; // overfow boundary for abig - eps = bexp(ibeta, 1-it); + eps = std::pow(ibeta, 1-it); relerr = ei_sqrt(eps); // tolerance for neglecting asml abig = 1.0/eps - 1.0; if (Scalar(nbig)>abig) nmax = abig; // largest safe n else nmax = nbig; } - + typedef typename ei_packet_traits::type Packet; const int ps = ei_packet_traits::size; Packet pasml = ei_pset1(Scalar(0)); @@ -173,6 +179,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) return abig; else return abig * ei_sqrt(Scalar(1) + ei_abs2(asml/abig)); + #endif } #define BENCH_PERF(NRM) { \ @@ -196,7 +203,7 @@ void check_accuracy(double basef, double based, int s) double yd = based * ei_abs(ei_random()); VectorXf vf = VectorXf::Ones(s) * yf; VectorXd vd = VectorXd::Ones(s) * yd; - + std::cout << "reference\t" << ei_sqrt(double(s))*yf << "\t" << ei_sqrt(double(s))*yd << "\n"; std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n"; std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n"; @@ -205,55 +212,80 @@ void check_accuracy(double basef, double based, int s) std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n"; } -int main(int argc, char** argv) +void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) +{ + VectorXf vf(s); + VectorXd vd(s); + for (int i=0; i()) * std::pow(double(10), ei_random(ef0,ef1)); + vd[i] = ei_abs(ei_random()) * std::pow(double(10), ei_random(ed0,ed1)); + } + + //std::cout << "reference\t" << ei_sqrt(double(s))*yf << "\t" << ei_sqrt(double(s))*yd << "\n"; + std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast()) << "\t" << sqsumNorm(vd.cast()) << "\n"; + std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast()) << "\t" << hypotNorm(vd.cast()) << "\n"; + std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; + std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; + std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast()) << "\t" << lapackNorm(vd.cast()) << "\n"; +} + +int main(int argc, char** argv) { int tries = 5; int iters = 100000; double y = 1.1345743233455785456788e12 * ei_random(); VectorXf v = VectorXf::Ones(1024) * y; - -// std::cerr << "Performance (out of cache):\n"; -// { -// int iters = 1; -// VectorXf vf = VectorXf::Ones(1024*1024*32) * y; -// VectorXd vd = VectorXd::Ones(1024*1024*32) * y; -// BENCH_PERF(sqsumNorm); -// BENCH_PERF(blueNorm); -// BENCH_PERF(pblueNorm); -// BENCH_PERF(lapackNorm); -// BENCH_PERF(hypotNorm); -// } -// -// std::cerr << "\nPerformance (in cache):\n"; -// { -// int iters = 100000; -// VectorXf vf = VectorXf::Ones(512) * y; -// VectorXd vd = VectorXd::Ones(512) * y; -// BENCH_PERF(sqsumNorm); -// BENCH_PERF(blueNorm); -// BENCH_PERF(pblueNorm); -// BENCH_PERF(lapackNorm); -// BENCH_PERF(hypotNorm); -// } - + + std::cerr << "Performance (out of cache):\n"; + { + int iters = 1; + VectorXf vf = VectorXf::Ones(1024*1024*32) * y; + VectorXd vd = VectorXd::Ones(1024*1024*32) * y; + BENCH_PERF(sqsumNorm); + BENCH_PERF(blueNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + } + + std::cerr << "\nPerformance (in cache):\n"; + { + int iters = 100000; + VectorXf vf = VectorXf::Ones(512) * y; + VectorXd vd = VectorXd::Ones(512) * y; + BENCH_PERF(sqsumNorm); + BENCH_PERF(blueNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + } + int s = 10000; - double basef_ok = 1.1345743233455785456788e12; - double based_ok = 1.1345743233455785456788e32; - - double basef_under = 1.1345743233455785456788e-23; - double based_under = 1.1345743233455785456788e-180; - + double basef_ok = 1.1345743233455785456788e15; + double based_ok = 1.1345743233455785456788e95; + + double basef_under = 1.1345743233455785456788e-27; + double based_under = 1.1345743233455785456788e-315; + double basef_over = 1.1345743233455785456788e+27; - double based_over = 1.1345743233455785456788e+185; - + double based_over = 1.1345743233455785456788e+302; + std::cout.precision(20); - + std::cerr << "\nNo under/overflow:\n"; check_accuracy(basef_ok, based_ok, s); - + std::cerr << "\nUnderflow:\n"; check_accuracy(basef_under, based_under, 1); - + std::cerr << "\nOverflow:\n"; check_accuracy(basef_over, based_over, s); + + std::cerr << "\nVarying (over):\n"; + for (int k=0; k<5; ++k) + { + check_accuracy_var(20,27,190,302,s); + std::cout << "\n"; + } }