mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-12 14:25:16 +08:00
bde6741641
Replaces `std::sqrt` with `complex_sqrt` for all platforms (previously `complex_sqrt` was only used for CUDA and MSVC), and implements custom `complex_rsqrt`. Also introduces `numext::rsqrt` to simplify implementation, and modified `numext::hypot` to adhere to IEEE IEC 6059 for special cases. The `complex_sqrt` and `complex_rsqrt` implementations were found to be significantly faster than `std::sqrt<std::complex<T>>` and `1/numext::sqrt<std::complex<T>>`. Benchmark file attached. ``` GCC 10, Intel Xeon, x86_64: --------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------- BM_Sqrt<std::complex<float>> 9.21 ns 9.21 ns 73225448 BM_StdSqrt<std::complex<float>> 17.1 ns 17.1 ns 40966545 BM_Sqrt<std::complex<double>> 8.53 ns 8.53 ns 81111062 BM_StdSqrt<std::complex<double>> 21.5 ns 21.5 ns 32757248 BM_Rsqrt<std::complex<float>> 10.3 ns 10.3 ns 68047474 BM_DivSqrt<std::complex<float>> 16.3 ns 16.3 ns 42770127 BM_Rsqrt<std::complex<double>> 11.3 ns 11.3 ns 61322028 BM_DivSqrt<std::complex<double>> 16.5 ns 16.5 ns 42200711 Clang 11, Intel Xeon, x86_64: --------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------- BM_Sqrt<std::complex<float>> 7.46 ns 7.45 ns 90742042 BM_StdSqrt<std::complex<float>> 16.6 ns 16.6 ns 42369878 BM_Sqrt<std::complex<double>> 8.49 ns 8.49 ns 81629030 BM_StdSqrt<std::complex<double>> 21.8 ns 21.7 ns 31809588 BM_Rsqrt<std::complex<float>> 8.39 ns 8.39 ns 82933666 BM_DivSqrt<std::complex<float>> 14.4 ns 14.4 ns 48638676 BM_Rsqrt<std::complex<double>> 9.83 ns 9.82 ns 70068956 BM_DivSqrt<std::complex<double>> 15.7 ns 15.7 ns 44487798 Clang 9, Pixel 2, aarch64: --------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------- BM_Sqrt<std::complex<float>> 24.2 ns 24.1 ns 28616031 BM_StdSqrt<std::complex<float>> 104 ns 103 ns 6826926 BM_Sqrt<std::complex<double>> 31.8 ns 31.8 ns 22157591 BM_StdSqrt<std::complex<double>> 128 ns 128 ns 5437375 BM_Rsqrt<std::complex<float>> 31.9 ns 31.8 ns 22384383 BM_DivSqrt<std::complex<float>> 99.2 ns 98.9 ns 7250438 BM_Rsqrt<std::complex<double>> 46.0 ns 45.8 ns 15338689 BM_DivSqrt<std::complex<double>> 119 ns 119 ns 5898944 ```
245 lines
10 KiB
C++
245 lines
10 KiB
C++
// This file is part of Eigen, a lightweight C++ template library
|
|
// for linear algebra.
|
|
//
|
|
// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
#include "main.h"
|
|
|
|
template<typename T> EIGEN_DONT_INLINE T copy(const T& x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
template<typename MatrixType> void stable_norm(const MatrixType& m)
|
|
{
|
|
/* this test covers the following files:
|
|
StableNorm.h
|
|
*/
|
|
using std::sqrt;
|
|
using std::abs;
|
|
typedef typename MatrixType::Scalar Scalar;
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
|
|
|
bool complex_real_product_ok = true;
|
|
|
|
// Check the basic machine-dependent constants.
|
|
{
|
|
int ibeta, it, iemin, iemax;
|
|
|
|
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
|
|
it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
|
|
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
|
|
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
|
|
|
|
VERIFY( (!(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) || (it<=4 && ibeta <= 3 ) || it<2))
|
|
&& "the stable norm algorithm cannot be guaranteed on this computer");
|
|
|
|
Scalar inf = std::numeric_limits<RealScalar>::infinity();
|
|
if(NumTraits<Scalar>::IsComplex && (numext::isnan)(inf*RealScalar(1)) )
|
|
{
|
|
complex_real_product_ok = false;
|
|
static bool first = true;
|
|
if(first)
|
|
std::cerr << "WARNING: compiler mess up complex*real product, " << inf << " * " << 1.0 << " = " << inf*RealScalar(1) << std::endl;
|
|
first = false;
|
|
}
|
|
}
|
|
|
|
|
|
Index rows = m.rows();
|
|
Index cols = m.cols();
|
|
|
|
// get a non-zero random factor
|
|
Scalar factor = internal::random<Scalar>();
|
|
while(numext::abs2(factor)<RealScalar(1e-4))
|
|
factor = internal::random<Scalar>();
|
|
Scalar big = factor * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
|
|
|
|
factor = internal::random<Scalar>();
|
|
while(numext::abs2(factor)<RealScalar(1e-4))
|
|
factor = internal::random<Scalar>();
|
|
Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
|
|
|
|
Scalar one(1);
|
|
|
|
MatrixType vzero = MatrixType::Zero(rows, cols),
|
|
vrand = MatrixType::Random(rows, cols),
|
|
vbig(rows, cols),
|
|
vsmall(rows,cols);
|
|
|
|
vbig.fill(big);
|
|
vsmall.fill(small);
|
|
|
|
VERIFY_IS_MUCH_SMALLER_THAN(vzero.norm(), static_cast<RealScalar>(1));
|
|
VERIFY_IS_APPROX(vrand.stableNorm(), vrand.norm());
|
|
VERIFY_IS_APPROX(vrand.blueNorm(), vrand.norm());
|
|
VERIFY_IS_APPROX(vrand.hypotNorm(), vrand.norm());
|
|
|
|
// test with expressions as input
|
|
VERIFY_IS_APPROX((one*vrand).stableNorm(), vrand.norm());
|
|
VERIFY_IS_APPROX((one*vrand).blueNorm(), vrand.norm());
|
|
VERIFY_IS_APPROX((one*vrand).hypotNorm(), vrand.norm());
|
|
VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).stableNorm(), vrand.norm());
|
|
VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).blueNorm(), vrand.norm());
|
|
VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).hypotNorm(), vrand.norm());
|
|
|
|
RealScalar size = static_cast<RealScalar>(m.size());
|
|
|
|
// test numext::isfinite
|
|
VERIFY(!(numext::isfinite)( std::numeric_limits<RealScalar>::infinity()));
|
|
VERIFY(!(numext::isfinite)(sqrt(-abs(big))));
|
|
|
|
// test overflow
|
|
VERIFY((numext::isfinite)(sqrt(size)*abs(big)));
|
|
VERIFY_IS_NOT_APPROX(sqrt(copy(vbig.squaredNorm())), abs(sqrt(size)*big)); // here the default norm must fail
|
|
VERIFY_IS_APPROX(vbig.stableNorm(), sqrt(size)*abs(big));
|
|
VERIFY_IS_APPROX(vbig.blueNorm(), sqrt(size)*abs(big));
|
|
VERIFY_IS_APPROX(vbig.hypotNorm(), sqrt(size)*abs(big));
|
|
|
|
// test underflow
|
|
VERIFY((numext::isfinite)(sqrt(size)*abs(small)));
|
|
VERIFY_IS_NOT_APPROX(sqrt(copy(vsmall.squaredNorm())), abs(sqrt(size)*small)); // here the default norm must fail
|
|
VERIFY_IS_APPROX(vsmall.stableNorm(), sqrt(size)*abs(small));
|
|
VERIFY_IS_APPROX(vsmall.blueNorm(), sqrt(size)*abs(small));
|
|
VERIFY_IS_APPROX(vsmall.hypotNorm(), sqrt(size)*abs(small));
|
|
|
|
// Test compilation of cwise() version
|
|
VERIFY_IS_APPROX(vrand.colwise().stableNorm(), vrand.colwise().norm());
|
|
VERIFY_IS_APPROX(vrand.colwise().blueNorm(), vrand.colwise().norm());
|
|
VERIFY_IS_APPROX(vrand.colwise().hypotNorm(), vrand.colwise().norm());
|
|
VERIFY_IS_APPROX(vrand.rowwise().stableNorm(), vrand.rowwise().norm());
|
|
VERIFY_IS_APPROX(vrand.rowwise().blueNorm(), vrand.rowwise().norm());
|
|
VERIFY_IS_APPROX(vrand.rowwise().hypotNorm(), vrand.rowwise().norm());
|
|
|
|
// test NaN, +inf, -inf
|
|
MatrixType v;
|
|
Index i = internal::random<Index>(0,rows-1);
|
|
Index j = internal::random<Index>(0,cols-1);
|
|
|
|
// NaN
|
|
{
|
|
v = vrand;
|
|
v(i,j) = std::numeric_limits<RealScalar>::quiet_NaN();
|
|
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm()));
|
|
VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm()));
|
|
VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm()));
|
|
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm()));
|
|
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm()));
|
|
}
|
|
|
|
// +inf
|
|
{
|
|
v = vrand;
|
|
v(i,j) = std::numeric_limits<RealScalar>::infinity();
|
|
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm()));
|
|
VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm()));
|
|
VERIFY(!(numext::isfinite)(v.stableNorm()));
|
|
if(complex_real_product_ok){
|
|
VERIFY(isPlusInf(v.stableNorm()));
|
|
}
|
|
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm()));
|
|
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm()));
|
|
}
|
|
|
|
// -inf
|
|
{
|
|
v = vrand;
|
|
v(i,j) = -std::numeric_limits<RealScalar>::infinity();
|
|
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm()));
|
|
VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm()));
|
|
VERIFY(!(numext::isfinite)(v.stableNorm()));
|
|
if(complex_real_product_ok) {
|
|
VERIFY(isPlusInf(v.stableNorm()));
|
|
}
|
|
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm()));
|
|
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm()));
|
|
}
|
|
|
|
// mix
|
|
{
|
|
// Ensure unique indices otherwise inf may be overwritten by NaN.
|
|
Index i2, j2;
|
|
do {
|
|
i2 = internal::random<Index>(0,rows-1);
|
|
j2 = internal::random<Index>(0,cols-1);
|
|
} while (i2 == i && j2 == j);
|
|
v = vrand;
|
|
v(i,j) = -std::numeric_limits<RealScalar>::infinity();
|
|
v(i2,j2) = std::numeric_limits<RealScalar>::quiet_NaN();
|
|
VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm()));
|
|
VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm()));
|
|
VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm()));
|
|
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm()));
|
|
// hypot propagates inf over NaN.
|
|
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isinf)(v.hypotNorm()));
|
|
}
|
|
|
|
// stableNormalize[d]
|
|
{
|
|
VERIFY_IS_APPROX(vrand.stableNormalized(), vrand.normalized());
|
|
MatrixType vcopy(vrand);
|
|
vcopy.stableNormalize();
|
|
VERIFY_IS_APPROX(vcopy, vrand.normalized());
|
|
VERIFY_IS_APPROX((vrand.stableNormalized()).norm(), RealScalar(1));
|
|
VERIFY_IS_APPROX(vcopy.norm(), RealScalar(1));
|
|
VERIFY_IS_APPROX((vbig.stableNormalized()).norm(), RealScalar(1));
|
|
VERIFY_IS_APPROX((vsmall.stableNormalized()).norm(), RealScalar(1));
|
|
RealScalar big_scaling = ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
|
|
VERIFY_IS_APPROX(vbig/big_scaling, (vbig.stableNorm() * vbig.stableNormalized()).eval()/big_scaling);
|
|
VERIFY_IS_APPROX(vsmall, vsmall.stableNorm() * vsmall.stableNormalized());
|
|
}
|
|
}
|
|
|
|
template<typename Scalar>
|
|
void test_hypot()
|
|
{
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
|
Scalar factor = internal::random<Scalar>();
|
|
while(numext::abs2(factor)<RealScalar(1e-4))
|
|
factor = internal::random<Scalar>();
|
|
Scalar big = factor * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
|
|
|
|
factor = internal::random<Scalar>();
|
|
while(numext::abs2(factor)<RealScalar(1e-4))
|
|
factor = internal::random<Scalar>();
|
|
Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
|
|
|
|
Scalar one (1),
|
|
zero (0),
|
|
sqrt2 (std::sqrt(2)),
|
|
nan (std::numeric_limits<RealScalar>::quiet_NaN());
|
|
|
|
Scalar a = internal::random<Scalar>(-1,1);
|
|
Scalar b = internal::random<Scalar>(-1,1);
|
|
VERIFY_IS_APPROX(numext::hypot(a,b),std::sqrt(numext::abs2(a)+numext::abs2(b)));
|
|
VERIFY_IS_EQUAL(numext::hypot(zero,zero), zero);
|
|
VERIFY_IS_APPROX(numext::hypot(one, one), sqrt2);
|
|
VERIFY_IS_APPROX(numext::hypot(big,big), sqrt2*numext::abs(big));
|
|
VERIFY_IS_APPROX(numext::hypot(small,small), sqrt2*numext::abs(small));
|
|
VERIFY_IS_APPROX(numext::hypot(small,big), numext::abs(big));
|
|
VERIFY((numext::isnan)(numext::hypot(nan,a)));
|
|
VERIFY((numext::isnan)(numext::hypot(a,nan)));
|
|
}
|
|
|
|
EIGEN_DECLARE_TEST(stable_norm)
|
|
{
|
|
for(int i = 0; i < g_repeat; i++) {
|
|
CALL_SUBTEST_3( test_hypot<double>() );
|
|
CALL_SUBTEST_4( test_hypot<float>() );
|
|
CALL_SUBTEST_5( test_hypot<std::complex<double> >() );
|
|
CALL_SUBTEST_6( test_hypot<std::complex<float> >() );
|
|
|
|
CALL_SUBTEST_1( stable_norm(Matrix<float, 1, 1>()) );
|
|
CALL_SUBTEST_2( stable_norm(Vector4d()) );
|
|
CALL_SUBTEST_3( stable_norm(VectorXd(internal::random<int>(10,2000))) );
|
|
CALL_SUBTEST_3( stable_norm(MatrixXd(internal::random<int>(10,200), internal::random<int>(10,200))) );
|
|
CALL_SUBTEST_4( stable_norm(VectorXf(internal::random<int>(10,2000))) );
|
|
CALL_SUBTEST_5( stable_norm(VectorXcd(internal::random<int>(10,2000))) );
|
|
CALL_SUBTEST_6( stable_norm(VectorXcf(internal::random<int>(10,2000))) );
|
|
}
|
|
}
|