eigen/test/stable_norm.cpp
Antonio Sanchez bde6741641 Improved std::complex sqrt and rsqrt.
Replaces `std::sqrt` with `complex_sqrt` for all platforms (previously
`complex_sqrt` was only used for CUDA and MSVC), and implements
custom `complex_rsqrt`.

Also introduces `numext::rsqrt` to simplify implementation, and modified
`numext::hypot` to adhere to IEEE IEC 6059 for special cases.

The `complex_sqrt` and `complex_rsqrt` implementations were found to be
significantly faster than `std::sqrt<std::complex<T>>` and
`1/numext::sqrt<std::complex<T>>`.

Benchmark file attached.
```
GCC 10, Intel Xeon, x86_64:
---------------------------------------------------------------------------
Benchmark                                 Time             CPU   Iterations
---------------------------------------------------------------------------
BM_Sqrt<std::complex<float>>           9.21 ns         9.21 ns     73225448
BM_StdSqrt<std::complex<float>>        17.1 ns         17.1 ns     40966545
BM_Sqrt<std::complex<double>>          8.53 ns         8.53 ns     81111062
BM_StdSqrt<std::complex<double>>       21.5 ns         21.5 ns     32757248
BM_Rsqrt<std::complex<float>>          10.3 ns         10.3 ns     68047474
BM_DivSqrt<std::complex<float>>        16.3 ns         16.3 ns     42770127
BM_Rsqrt<std::complex<double>>         11.3 ns         11.3 ns     61322028
BM_DivSqrt<std::complex<double>>       16.5 ns         16.5 ns     42200711

Clang 11, Intel Xeon, x86_64:
---------------------------------------------------------------------------
Benchmark                                 Time             CPU   Iterations
---------------------------------------------------------------------------
BM_Sqrt<std::complex<float>>           7.46 ns         7.45 ns     90742042
BM_StdSqrt<std::complex<float>>        16.6 ns         16.6 ns     42369878
BM_Sqrt<std::complex<double>>          8.49 ns         8.49 ns     81629030
BM_StdSqrt<std::complex<double>>       21.8 ns         21.7 ns     31809588
BM_Rsqrt<std::complex<float>>          8.39 ns         8.39 ns     82933666
BM_DivSqrt<std::complex<float>>        14.4 ns         14.4 ns     48638676
BM_Rsqrt<std::complex<double>>         9.83 ns         9.82 ns     70068956
BM_DivSqrt<std::complex<double>>       15.7 ns         15.7 ns     44487798

Clang 9, Pixel 2, aarch64:
---------------------------------------------------------------------------
Benchmark                                 Time             CPU   Iterations
---------------------------------------------------------------------------
BM_Sqrt<std::complex<float>>           24.2 ns         24.1 ns     28616031
BM_StdSqrt<std::complex<float>>         104 ns          103 ns      6826926
BM_Sqrt<std::complex<double>>          31.8 ns         31.8 ns     22157591
BM_StdSqrt<std::complex<double>>        128 ns          128 ns      5437375
BM_Rsqrt<std::complex<float>>          31.9 ns         31.8 ns     22384383
BM_DivSqrt<std::complex<float>>        99.2 ns         98.9 ns      7250438
BM_Rsqrt<std::complex<double>>         46.0 ns         45.8 ns     15338689
BM_DivSqrt<std::complex<double>>        119 ns          119 ns      5898944
```
2021-01-17 08:50:57 -08:00

245 lines
10 KiB
C++

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
template<typename T> EIGEN_DONT_INLINE T copy(const T& x)
{
return x;
}
template<typename MatrixType> void stable_norm(const MatrixType& m)
{
/* this test covers the following files:
StableNorm.h
*/
using std::sqrt;
using std::abs;
typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
bool complex_real_product_ok = true;
// Check the basic machine-dependent constants.
{
int ibeta, it, iemin, iemax;
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
VERIFY( (!(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) || (it<=4 && ibeta <= 3 ) || it<2))
&& "the stable norm algorithm cannot be guaranteed on this computer");
Scalar inf = std::numeric_limits<RealScalar>::infinity();
if(NumTraits<Scalar>::IsComplex && (numext::isnan)(inf*RealScalar(1)) )
{
complex_real_product_ok = false;
static bool first = true;
if(first)
std::cerr << "WARNING: compiler mess up complex*real product, " << inf << " * " << 1.0 << " = " << inf*RealScalar(1) << std::endl;
first = false;
}
}
Index rows = m.rows();
Index cols = m.cols();
// get a non-zero random factor
Scalar factor = internal::random<Scalar>();
while(numext::abs2(factor)<RealScalar(1e-4))
factor = internal::random<Scalar>();
Scalar big = factor * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
factor = internal::random<Scalar>();
while(numext::abs2(factor)<RealScalar(1e-4))
factor = internal::random<Scalar>();
Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
Scalar one(1);
MatrixType vzero = MatrixType::Zero(rows, cols),
vrand = MatrixType::Random(rows, cols),
vbig(rows, cols),
vsmall(rows,cols);
vbig.fill(big);
vsmall.fill(small);
VERIFY_IS_MUCH_SMALLER_THAN(vzero.norm(), static_cast<RealScalar>(1));
VERIFY_IS_APPROX(vrand.stableNorm(), vrand.norm());
VERIFY_IS_APPROX(vrand.blueNorm(), vrand.norm());
VERIFY_IS_APPROX(vrand.hypotNorm(), vrand.norm());
// test with expressions as input
VERIFY_IS_APPROX((one*vrand).stableNorm(), vrand.norm());
VERIFY_IS_APPROX((one*vrand).blueNorm(), vrand.norm());
VERIFY_IS_APPROX((one*vrand).hypotNorm(), vrand.norm());
VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).stableNorm(), vrand.norm());
VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).blueNorm(), vrand.norm());
VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).hypotNorm(), vrand.norm());
RealScalar size = static_cast<RealScalar>(m.size());
// test numext::isfinite
VERIFY(!(numext::isfinite)( std::numeric_limits<RealScalar>::infinity()));
VERIFY(!(numext::isfinite)(sqrt(-abs(big))));
// test overflow
VERIFY((numext::isfinite)(sqrt(size)*abs(big)));
VERIFY_IS_NOT_APPROX(sqrt(copy(vbig.squaredNorm())), abs(sqrt(size)*big)); // here the default norm must fail
VERIFY_IS_APPROX(vbig.stableNorm(), sqrt(size)*abs(big));
VERIFY_IS_APPROX(vbig.blueNorm(), sqrt(size)*abs(big));
VERIFY_IS_APPROX(vbig.hypotNorm(), sqrt(size)*abs(big));
// test underflow
VERIFY((numext::isfinite)(sqrt(size)*abs(small)));
VERIFY_IS_NOT_APPROX(sqrt(copy(vsmall.squaredNorm())), abs(sqrt(size)*small)); // here the default norm must fail
VERIFY_IS_APPROX(vsmall.stableNorm(), sqrt(size)*abs(small));
VERIFY_IS_APPROX(vsmall.blueNorm(), sqrt(size)*abs(small));
VERIFY_IS_APPROX(vsmall.hypotNorm(), sqrt(size)*abs(small));
// Test compilation of cwise() version
VERIFY_IS_APPROX(vrand.colwise().stableNorm(), vrand.colwise().norm());
VERIFY_IS_APPROX(vrand.colwise().blueNorm(), vrand.colwise().norm());
VERIFY_IS_APPROX(vrand.colwise().hypotNorm(), vrand.colwise().norm());
VERIFY_IS_APPROX(vrand.rowwise().stableNorm(), vrand.rowwise().norm());
VERIFY_IS_APPROX(vrand.rowwise().blueNorm(), vrand.rowwise().norm());
VERIFY_IS_APPROX(vrand.rowwise().hypotNorm(), vrand.rowwise().norm());
// test NaN, +inf, -inf
MatrixType v;
Index i = internal::random<Index>(0,rows-1);
Index j = internal::random<Index>(0,cols-1);
// NaN
{
v = vrand;
v(i,j) = std::numeric_limits<RealScalar>::quiet_NaN();
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm()));
VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm()));
VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm()));
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm()));
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm()));
}
// +inf
{
v = vrand;
v(i,j) = std::numeric_limits<RealScalar>::infinity();
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm()));
VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm()));
VERIFY(!(numext::isfinite)(v.stableNorm()));
if(complex_real_product_ok){
VERIFY(isPlusInf(v.stableNorm()));
}
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm()));
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm()));
}
// -inf
{
v = vrand;
v(i,j) = -std::numeric_limits<RealScalar>::infinity();
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm()));
VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm()));
VERIFY(!(numext::isfinite)(v.stableNorm()));
if(complex_real_product_ok) {
VERIFY(isPlusInf(v.stableNorm()));
}
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm()));
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm()));
}
// mix
{
// Ensure unique indices otherwise inf may be overwritten by NaN.
Index i2, j2;
do {
i2 = internal::random<Index>(0,rows-1);
j2 = internal::random<Index>(0,cols-1);
} while (i2 == i && j2 == j);
v = vrand;
v(i,j) = -std::numeric_limits<RealScalar>::infinity();
v(i2,j2) = std::numeric_limits<RealScalar>::quiet_NaN();
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm()));
VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm()));
VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm()));
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm()));
// hypot propagates inf over NaN.
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isinf)(v.hypotNorm()));
}
// stableNormalize[d]
{
VERIFY_IS_APPROX(vrand.stableNormalized(), vrand.normalized());
MatrixType vcopy(vrand);
vcopy.stableNormalize();
VERIFY_IS_APPROX(vcopy, vrand.normalized());
VERIFY_IS_APPROX((vrand.stableNormalized()).norm(), RealScalar(1));
VERIFY_IS_APPROX(vcopy.norm(), RealScalar(1));
VERIFY_IS_APPROX((vbig.stableNormalized()).norm(), RealScalar(1));
VERIFY_IS_APPROX((vsmall.stableNormalized()).norm(), RealScalar(1));
RealScalar big_scaling = ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
VERIFY_IS_APPROX(vbig/big_scaling, (vbig.stableNorm() * vbig.stableNormalized()).eval()/big_scaling);
VERIFY_IS_APPROX(vsmall, vsmall.stableNorm() * vsmall.stableNormalized());
}
}
template<typename Scalar>
void test_hypot()
{
typedef typename NumTraits<Scalar>::Real RealScalar;
Scalar factor = internal::random<Scalar>();
while(numext::abs2(factor)<RealScalar(1e-4))
factor = internal::random<Scalar>();
Scalar big = factor * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
factor = internal::random<Scalar>();
while(numext::abs2(factor)<RealScalar(1e-4))
factor = internal::random<Scalar>();
Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
Scalar one (1),
zero (0),
sqrt2 (std::sqrt(2)),
nan (std::numeric_limits<RealScalar>::quiet_NaN());
Scalar a = internal::random<Scalar>(-1,1);
Scalar b = internal::random<Scalar>(-1,1);
VERIFY_IS_APPROX(numext::hypot(a,b),std::sqrt(numext::abs2(a)+numext::abs2(b)));
VERIFY_IS_EQUAL(numext::hypot(zero,zero), zero);
VERIFY_IS_APPROX(numext::hypot(one, one), sqrt2);
VERIFY_IS_APPROX(numext::hypot(big,big), sqrt2*numext::abs(big));
VERIFY_IS_APPROX(numext::hypot(small,small), sqrt2*numext::abs(small));
VERIFY_IS_APPROX(numext::hypot(small,big), numext::abs(big));
VERIFY((numext::isnan)(numext::hypot(nan,a)));
VERIFY((numext::isnan)(numext::hypot(a,nan)));
}
EIGEN_DECLARE_TEST(stable_norm)
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_3( test_hypot<double>() );
CALL_SUBTEST_4( test_hypot<float>() );
CALL_SUBTEST_5( test_hypot<std::complex<double> >() );
CALL_SUBTEST_6( test_hypot<std::complex<float> >() );
CALL_SUBTEST_1( stable_norm(Matrix<float, 1, 1>()) );
CALL_SUBTEST_2( stable_norm(Vector4d()) );
CALL_SUBTEST_3( stable_norm(VectorXd(internal::random<int>(10,2000))) );
CALL_SUBTEST_3( stable_norm(MatrixXd(internal::random<int>(10,200), internal::random<int>(10,200))) );
CALL_SUBTEST_4( stable_norm(VectorXf(internal::random<int>(10,2000))) );
CALL_SUBTEST_5( stable_norm(VectorXcd(internal::random<int>(10,2000))) );
CALL_SUBTEST_6( stable_norm(VectorXcf(internal::random<int>(10,2000))) );
}
}