diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 3118e4e5e..22db46715 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -84,8 +84,7 @@ struct default_packet_traits HasErf = 0, HasErfc = 0, HasNdtri = 0, - HasI0e = 0, - HasI1e = 0, + HasBessel = 0, HasIGamma = 0, HasIGammaDerA = 0, HasGammaSampleDerAlpha = 0, diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 2e5f5e5bc..0472e9850 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -73,8 +73,7 @@ template<> struct packet_traits : default_packet_traits HasExpm1 = 1, HasExp = 1, HasNdtri = 1, - HasI0e = 1, - HasI1e = 1, + HasBessel = 1, HasSqrt = 1, HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 67e667640..589ccbb7a 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -99,8 +99,7 @@ template<> struct packet_traits : default_packet_traits HasExpm1 = 1, HasNdtri = 1, #endif - HasI0e = 1, - HasI1e = 1, + HasBessel = 1, HasExp = 1, HasSqrt = EIGEN_FAST_MATH, HasRsqrt = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index bdbaa5362..5a66e2da9 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -45,8 +45,7 @@ template<> struct packet_traits : default_packet_traits HasErf = 1, HasErfc = 1, HasNdtri = 1, - HasI0e = 1, - HasI1e = 1, + HasBessel = 1, HasIGamma = 1, HasIGammaDerA = 1, HasGammaSampleDerAlpha = 1, @@ -80,8 +79,7 @@ template<> struct packet_traits : default_packet_traits HasErf = 1, HasErfc = 1, HasNdtri = 1, - HasI0e = 1, - HasI1e = 1, + HasBessel = 1, HasIGamma = 1, HasIGammaDerA = 1, HasGammaSampleDerAlpha = 1, diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 0aadefab7..b48d70afd 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -114,8 +114,7 @@ template<> struct packet_traits : default_packet_traits HasExpm1 = 1, HasNdtri = 1, HasExp = 1, - HasI0e = 1, - HasI1e = 1, + HasBessel = 1, HasSqrt = 1, HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 749945031..953f52af0 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -215,13 +215,26 @@ template struct scalar_digamma_op; template struct scalar_erf_op; template struct scalar_erfc_op; template struct scalar_ndtri_op; -template struct scalar_i0e_op; -template struct scalar_i1e_op; template struct scalar_igamma_op; template struct scalar_igammac_op; template struct scalar_zeta_op; template struct scalar_betainc_op; +// Bessel functions in SpecialFunctions module +template struct scalar_bessel_i0_op; +template struct scalar_bessel_i0e_op; +template struct scalar_bessel_i1_op; +template struct scalar_bessel_i1e_op; +template struct scalar_bessel_j0_op; +template struct scalar_bessel_y0_op; +template struct scalar_bessel_j1_op; +template struct scalar_bessel_y1_op; +template struct scalar_bessel_k0_op; +template struct scalar_bessel_k0e_op; +template struct scalar_bessel_k1_op; +template struct scalar_bessel_k1e_op; + + } // end namespace internal struct IOFormat; diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 05739a31b..0f9ce6d06 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -609,8 +609,28 @@ template void packetmath_real() CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasSqrt, Scalar(1)/std::sqrt, internal::prsqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); - CHECK_CWISE1_IF(PacketTraits::HasI0e, numext::i0e, internal::pi0e); - CHECK_CWISE1_IF(PacketTraits::HasI1e, numext::i1e, internal::pi1e); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::i0, internal::pi0); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::i0e, internal::pi0e); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::i1, internal::pi1); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::i1e, internal::pi1e); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::j0, internal::pj0); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::j1, internal::pj1); + + // Use a smaller data range for the positive bessel operations as these + // can have much more error at very small and very large values. + for (int i=0; i(0.01,1) * std::pow( + Scalar(10), internal::random(-1,2)); + data2[i] = internal::random(0.01,1) * std::pow( + Scalar(10), internal::random(-1,2)); + } + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::y0, internal::py0); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::y1, internal::py1); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::k0, internal::pk0); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::k0e, internal::pk0e); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::k1, internal::pk1); + CHECK_CWISE1_IF(PacketTraits::HasBessel, numext::k1e, internal::pk1e); + #if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L) CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); @@ -945,7 +965,7 @@ EIGEN_DECLARE_TEST(packetmath) { g_first_pass = true; for(int i = 0; i < g_repeat; i++) { - + CALL_SUBTEST_1( runner::run() ); CALL_SUBTEST_2( runner::run() ); CALL_SUBTEST_3( runner::run() ); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index bcb0daf30..48e0217e5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -136,15 +136,75 @@ class TensorBase } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> - i0e() const { - return unaryExpr(internal::scalar_i0e_op()); + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + i0() const { + return unaryExpr(internal::scalar_bessel_i0_op()); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + i0e() const { + return unaryExpr(internal::scalar_bessel_i0e_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + i1() const { + return unaryExpr(internal::scalar_bessel_i1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> i1e() const { - return unaryExpr(internal::scalar_i1e_op()); + return unaryExpr(internal::scalar_bessel_i1e_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + j0() const { + return unaryExpr(internal::scalar_bessel_j0_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + y0() const { + return unaryExpr(internal::scalar_bessel_y0_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + j1() const { + return unaryExpr(internal::scalar_bessel_j1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + y1() const { + return unaryExpr(internal::scalar_bessel_y1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + k0() const { + return unaryExpr(internal::scalar_bessel_k0_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + k0e() const { + return unaryExpr(internal::scalar_bessel_k0e_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + k1() const { + return unaryExpr(internal::scalar_bessel_k1_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + k1e() const { + return unaryExpr(internal::scalar_bessel_k1e_op()); } // igamma(a = this, x = other) diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions index a5abd407d..ffa348eb4 100644 --- a/unsupported/Eigen/SpecialFunctions +++ b/unsupported/Eigen/SpecialFunctions @@ -37,8 +37,20 @@ namespace Eigen { * - polygamma * - zeta * - betainc + * + * Bessel Functions + * - i0 * - i0e + * - i1 * - i1e + * - j0 + * - j1 + * - y0 + * - y1 + * - k0 + * - k0e + * - k1 + * - k1e * * \code * #include @@ -48,6 +60,11 @@ namespace Eigen { } +#include "src/SpecialFunctions/BesselFunctionsImpl.h" +#include "src/SpecialFunctions/BesselFunctionsPacketMath.h" +#include "src/SpecialFunctions/BesselFunctionsHalf.h" +#include "src/SpecialFunctions/BesselFunctionsFunctors.h" +#include "src/SpecialFunctions/BesselFunctionsArrayAPI.h" #include "src/SpecialFunctions/SpecialFunctionsImpl.h" #include "src/SpecialFunctions/SpecialFunctionsPacketMath.h" #include "src/SpecialFunctions/SpecialFunctionsHalf.h" diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h new file mode 100644 index 000000000..8f96c2ae7 --- /dev/null +++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h @@ -0,0 +1,286 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_BESSELFUNCTIONS_ARRAYAPI_H +#define EIGEN_BESSELFUNCTIONS_ARRAYAPI_H + +namespace Eigen { + +/** \returns an expression of the coefficient-wise i0(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the first kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::i0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0_op, const Derived> +i0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise i0e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the first kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i0e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::i0e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0e_op, const Derived> +i0e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i0e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise i1(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the first kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::i1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1_op, const Derived> +i1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise i1e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the first kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i1e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::i1e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1e_op, const Derived> +i1e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_i1e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k0(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the second kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::k0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0_op, const Derived> +k0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k0e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the second kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k0e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::k0e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0e_op, const Derived> +k0e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k0e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k1(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the second kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::k1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1_op, const Derived> +k1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise k1e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of the second kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of k1e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::k1e() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1e_op, const Derived> +k1e(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_k1e_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise j0(\a x) to the given + * arrays. + * + * It returns the Bessel function of the first kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of j0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::j0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j0_op, const Derived> +j0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise y0(\a x) to the given + * arrays. + * + * It returns the Bessel function of the second kind of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of y0(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::y0() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y0_op, const Derived> +y0(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y0_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise j1(\a x) to the given + * arrays. + * + * It returns the modified Bessel function of the first kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of j1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::j1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j1_op, const Derived> +j1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_j1_op, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise y1(\a x) to the given + * arrays. + * + * It returns the Bessel function of the second kind of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of y1(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::y1() + */ +template +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y1_op, const Derived> +y1(const Eigen::ArrayBase& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_bessel_y1_op, + const Derived>(x.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_ARRAYAPI_H diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h new file mode 100644 index 000000000..e57d5042b --- /dev/null +++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h @@ -0,0 +1,357 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSELFUNCTIONS_FUNCTORS_H +#define EIGEN_BESSELFUNCTIONS_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \brief Template functor to compute the modified Bessel function of the first + * kind of order zero. + * \sa class CwiseUnaryOp, Cwise::i0() + */ +template +struct scalar_bessel_i0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::i0; + return i0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pi0(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. We also add + // the cost of an additional exp over i0e. + Cost = 28 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the first kind of order zero + * \sa class CwiseUnaryOp, Cwise::i0e() + */ +template +struct scalar_bessel_i0e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::i0e; + return i0e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pi0e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. + Cost = 20 * NumTraits::MulCost + 40 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the modified Bessel function of the first + * kind of order one + * \sa class CwiseUnaryOp, Cwise::i1() + */ +template +struct scalar_bessel_i1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::i1; + return i1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pi1(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. We also add + // the cost of an additional exp over i1e. + Cost = 28 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the first kind of order zero + * \sa class CwiseUnaryOp, Cwise::i1e() + */ +template +struct scalar_bessel_i1e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::i1e; + return i1e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pi1e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. + Cost = 20 * NumTraits::MulCost + 40 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the second kind of + * order zero + * \sa class CwiseUnaryOp, Cwise::j0() + */ +template +struct scalar_bessel_j0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::j0; + return j0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pj0(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine and rsqrt cost. + Cost = 63 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the second kind of + * order zero + * \sa class CwiseUnaryOp, Cwise::y0() + */ +template +struct scalar_bessel_y0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::y0; + return y0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::py0(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine, rsqrt and j0 cost. + Cost = 126 * NumTraits::MulCost + 96 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the first kind of + * order one + * \sa class CwiseUnaryOp, Cwise::j1() + */ +template +struct scalar_bessel_j1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::j1; + return j1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pj1(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine and rsqrt cost. + Cost = 63 * NumTraits::MulCost + 48 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the Bessel function of the second kind of + * order one + * \sa class CwiseUnaryOp, Cwise::j1e() + */ +template +struct scalar_bessel_y1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::y1; + return y1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::py1(x); + } +}; +template +struct functor_traits > { + enum { + // 6 polynomial of order ~N=8 is computed. + // The cost is N multiplications and N additions each, along with a + // sine, cosine, rsqrt and j1 cost. + Cost = 126 * NumTraits::MulCost + 96 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the modified Bessel function of the second + * kind of order zero + * \sa class CwiseUnaryOp, Cwise::k0() + */ +template +struct scalar_bessel_k0_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::k0; + return k0(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pk0(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i0, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the second kind of order zero + * \sa class CwiseUnaryOp, Cwise::k0e() + */ +template +struct scalar_bessel_k0e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::k0e; + return k0e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pk0e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i0, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the modified Bessel function of the + * second kind of order one + * \sa class CwiseUnaryOp, Cwise::k1() + */ +template +struct scalar_bessel_k1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::k1; + return k1(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pk1(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i1, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of the second kind of order one + * \sa class CwiseUnaryOp, Cwise::k1e() + */ +template +struct scalar_bessel_k1e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1e_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { + using numext::k1e; + return k1e(x); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return internal::pk1e(x); + } +}; +template +struct functor_traits > { + enum { + // On average, a Chebyshev polynomial of order N=10 is computed. + // The cost is N multiplications and 2N additions. In addition we compute + // i1, a log, exp and prsqrt and sin and cos. + Cost = 68 * NumTraits::MulCost + 88 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBessel + }; +}; + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_FUNCTORS_H diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h new file mode 100644 index 000000000..9aad7ac96 --- /dev/null +++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h @@ -0,0 +1,66 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSELFUNCTIONS_HALF_H +#define EIGEN_BESSELFUNCTIONS_HALF_H + +namespace Eigen { +namespace numext { + +#if EIGEN_HAS_C99_MATH +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::i0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::i0e(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::i1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::i1e(static_cast(x))); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half j0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::j0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half j1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::j1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half y0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::y0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half y1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::y1(static_cast(x))); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half k0(const Eigen::half& x) { + return Eigen::half(Eigen::numext::k0(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half k0e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::k0e(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half k1(const Eigen::half& x) { + return Eigen::half(Eigen::numext::k1(static_cast(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half k1e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::k1e(static_cast(x))); +} +#endif + +} // end namespace numext +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_HALF_H diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h new file mode 100644 index 000000000..b279687c2 --- /dev/null +++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h @@ -0,0 +1,1959 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSEL_FUNCTIONS_H +#define EIGEN_BESSEL_FUNCTIONS_H + +namespace Eigen { +namespace internal { + +// Parts of this code are based on the Cephes Math Library. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier +// +// Permission has been kindly provided by the original author +// to incorporate the Cephes software into the Eigen codebase: +// +// From: Stephen Moshier +// To: Eugene Brevdo +// Subject: Re: Permission to wrap several cephes functions in Eigen +// +// Hello Eugene, +// +// Thank you for writing. +// +// If your licensing is similar to BSD, the formal way that has been +// handled is simply to add a statement to the effect that you are incorporating +// the Cephes software by permission of the author. +// +// Good luck with your project, +// Steve + + +/**************************************************************************** + * Implementation of Bessel function, based on Cephes * + ****************************************************************************/ + +template +struct i0e_retval { + typedef Scalar type; +}; + +template +struct generic_i0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_i0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i0ef.c + * + * Modified Bessel function of order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, i0ef(); + * + * y = i0ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order zero of the argument. + * + * The function is defined as i0e(x) = exp(-|x|) j0( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 100000 3.7e-7 7.0e-8 + * See i0f(). + * + */ + + const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f, + -2.67079385394061173391E-7f, 1.11738753912010371815E-6f, + -4.41673835845875056359E-6f, 1.64484480707288970893E-5f, + -5.75419501008210370398E-5f, 1.88502885095841655729E-4f, + -5.76375574538582365885E-4f, 1.63947561694133579842E-3f, + -4.32430999505057594430E-3f, 1.05464603945949983183E-2f, + -2.37374148058994688156E-2f, 4.93052842396707084878E-2f, + -9.49010970480476444210E-2f, 1.71620901522208775349E-1f, + -3.04682672343198398683E-1f, 6.76795274409476084995E-1f}; + + const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f, + 2.04891858946906374183E-7f, 2.89137052083475648297E-6f, + 6.88975834691682398426E-5f, 3.36911647825569408990E-3f, + 8.04490411014108831608E-1f}; + T y = pabs(x); + T y_le_eight = internal::pchebevl::run( + pmadd(pset1(0.5f), y, pset1(-2.0f)), A); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0f), y), pset1(2.0f)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + return pselect(pcmp_le(y, pset1(8.0f)), y_le_eight, y_gt_eight); + } +}; + +template +struct generic_i0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i0e.c + * + * Modified Bessel function of order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, i0e(); + * + * y = i0e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order zero of the argument. + * + * The function is defined as i0e(x) = exp(-|x|) j0( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 5.4e-16 1.2e-16 + * See i0(). + * + */ + + const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17, + -2.43127984654795469359E-16, 1.71539128555513303061E-15, + -1.16853328779934516808E-14, 7.67618549860493561688E-14, + -4.85644678311192946090E-13, 2.95505266312963983461E-12, + -1.72682629144155570723E-11, 9.67580903537323691224E-11, + -5.18979560163526290666E-10, 2.65982372468238665035E-9, + -1.30002500998624804212E-8, 6.04699502254191894932E-8, + -2.67079385394061173391E-7, 1.11738753912010371815E-6, + -4.41673835845875056359E-6, 1.64484480707288970893E-5, + -5.75419501008210370398E-5, 1.88502885095841655729E-4, + -5.76375574538582365885E-4, 1.63947561694133579842E-3, + -4.32430999505057594430E-3, 1.05464603945949983183E-2, + -2.37374148058994688156E-2, 4.93052842396707084878E-2, + -9.49010970480476444210E-2, 1.71620901522208775349E-1, + -3.04682672343198398683E-1, 6.76795274409476084995E-1}; + const double B[] = { + -7.23318048787475395456E-18, -4.83050448594418207126E-18, + 4.46562142029675999901E-17, 3.46122286769746109310E-17, + -2.82762398051658348494E-16, -3.42548561967721913462E-16, + 1.77256013305652638360E-15, 3.81168066935262242075E-15, + -9.55484669882830764870E-15, -4.15056934728722208663E-14, + 1.54008621752140982691E-14, 3.85277838274214270114E-13, + 7.18012445138366623367E-13, -1.79417853150680611778E-12, + -1.32158118404477131188E-11, -3.14991652796324136454E-11, + 1.18891471078464383424E-11, 4.94060238822496958910E-10, + 3.39623202570838634515E-9, 2.26666899049817806459E-8, + 2.04891858946906374183E-7, 2.89137052083475648297E-6, + 6.88975834691682398426E-5, 3.36911647825569408990E-3, + 8.04490411014108831608E-1}; + T y = pabs(x); + T y_le_eight = internal::pchebevl::run( + pmadd(pset1(0.5), y, pset1(-2.0)), A); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0), y), pset1(2.0)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + return pselect(pcmp_le(y, pset1(8.0)), y_le_eight, y_gt_eight); + } +}; + +template +struct i0e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_i0e::run(x); + } +}; + +template +struct i0_retval { + typedef Scalar type; +}; + +template +struct generic_i0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + return pmul( + pexp(pabs(x)), + generic_i0e::run(x)); + } +}; + +template +struct i0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_i0::run(x); + } +}; + +template +struct i1e_retval { + typedef Scalar type; +}; + +template +struct generic_i1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_i1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i1ef.c + * + * Modified Bessel function of order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, i1ef(); + * + * y = i1ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order one of the argument. + * + * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 1.5e-6 1.5e-7 + * See i1(). + * + */ + const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f, + 2.00329475355213526229E-7f, -8.56872026469545474066E-7f, + 3.47025130813767847674E-6f, -1.32731636560394358279E-5f, + 4.78156510755005422638E-5f, -1.61760815825896745588E-4f, + 5.12285956168575772895E-4f, -1.51357245063125314899E-3f, + 4.15642294431288815669E-3f, -1.05640848946261981558E-2f, + 2.47264490306265168283E-2f, -5.29459812080949914269E-2f, + 1.02643658689847095384E-1f, -1.76416518357834055153E-1f, + 2.52587186443633654823E-1f}; + + const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f, + -2.51223623787020892529E-7f, -3.88256480887769039346E-6f, + -1.10588938762623716291E-4f, -9.76109749136146840777E-3f, + 7.78576235018280120474E-1f}; + + + T y = pabs(x); + T y_le_eight = pmul(y, internal::pchebevl::run( + pmadd(pset1(0.5f), y, pset1(-2.0f)), A)); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0f), y), + pset1(2.0f)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + y = pselect(pcmp_le(y, pset1(8.0f)), y_le_eight, y_gt_eight); + return pselect(pcmp_lt(x, pset1(0.0f)), -y, y); + } +}; + +template +struct generic_i1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* i1e.c + * + * Modified Bessel function of order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, i1e(); + * + * y = i1e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order one of the argument. + * + * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 2.0e-15 2.0e-16 + * See i1(). + * + */ + const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17, + 1.55363195773620046921E-16, -1.10559694773538630805E-15, + 7.60068429473540693410E-15, -5.04218550472791168711E-14, + 3.22379336594557470981E-13, -1.98397439776494371520E-12, + 1.17361862988909016308E-11, -6.66348972350202774223E-11, + 3.62559028155211703701E-10, -1.88724975172282928790E-9, + 9.38153738649577178388E-9, -4.44505912879632808065E-8, + 2.00329475355213526229E-7, -8.56872026469545474066E-7, + 3.47025130813767847674E-6, -1.32731636560394358279E-5, + 4.78156510755005422638E-5, -1.61760815825896745588E-4, + 5.12285956168575772895E-4, -1.51357245063125314899E-3, + 4.15642294431288815669E-3, -1.05640848946261981558E-2, + 2.47264490306265168283E-2, -5.29459812080949914269E-2, + 1.02643658689847095384E-1, -1.76416518357834055153E-1, + 2.52587186443633654823E-1}; + const double B[] = { + 7.51729631084210481353E-18, 4.41434832307170791151E-18, + -4.65030536848935832153E-17, -3.20952592199342395980E-17, + 2.96262899764595013876E-16, 3.30820231092092828324E-16, + -1.88035477551078244854E-15, -3.81440307243700780478E-15, + 1.04202769841288027642E-14, 4.27244001671195135429E-14, + -2.10154184277266431302E-14, -4.08355111109219731823E-13, + -7.19855177624590851209E-13, 2.03562854414708950722E-12, + 1.41258074366137813316E-11, 3.25260358301548823856E-11, + -1.89749581235054123450E-11, -5.58974346219658380687E-10, + -3.83538038596423702205E-9, -2.63146884688951950684E-8, + -2.51223623787020892529E-7, -3.88256480887769039346E-6, + -1.10588938762623716291E-4, -9.76109749136146840777E-3, + 7.78576235018280120474E-1}; + T y = pabs(x); + T y_le_eight = pmul(y, internal::pchebevl::run( + pmadd(pset1(0.5), y, pset1(-2.0)), A)); + T y_gt_eight = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(32.0), y), + pset1(2.0)), B), + prsqrt(y)); + // TODO: Perhaps instead check whether all packet elements are in + // [-8, 8] and evaluate a branch based off of that. It's possible + // in practice most elements are in this region. + y = pselect(pcmp_le(y, pset1(8.0)), y_le_eight, y_gt_eight); + return pselect(pcmp_lt(x, pset1(0.0f)), -y, y); + } +}; + +template +struct i1e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_i1e::run(x); + } +}; + +template +struct i1_retval { + typedef Scalar type; +}; + +template +struct generic_i1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + return pmul( + pexp(pabs(x)), + generic_i1e::run(x)); + } +}; + +template +struct i1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_i1::run(x); + } +}; + +template +struct k0e_retval { + typedef Scalar type; +}; + +template +struct generic_k0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k0ef.c + * Modified Bessel function, third kind, order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, k0ef(); + * + * y = k0ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order zero of the argument. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 8.1e-7 7.8e-8 + * See k0(). + * + */ + + const float A[] = {1.90451637722020886025E-9f, 2.53479107902614945675E-7f, + 2.28621210311945178607E-5f, 1.26461541144692592338E-3f, + 3.59799365153615016266E-2f, 3.44289899924628486886E-1f, + -5.35327393233902768720E-1f}; + + const float B[] = {-1.69753450938905987466E-9f, 8.57403401741422608519E-9f, + -4.66048989768794782956E-8f, 2.76681363944501510342E-7f, + -1.83175552271911948767E-6f, 1.39498137188764993662E-5f, + -1.28495495816278026384E-4f, 1.56988388573005337491E-3f, + -3.14481013119645005427E-2f, 2.44030308206595545468E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pmul( + pset1(-1.0), plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pmul(pexp(x), x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect( + pcmp_le(x, pset1(0.0)), + MAXNUM, + pselect(pcmp_le(x, two), x_le_two, x_gt_two)); + } +}; + +template +struct generic_k0e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k0e.c + * Modified Bessel function, third kind, order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, k0e(); + * + * y = k0e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order zero of the argument. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 1.4e-15 1.4e-16 + * See k0(). + * + */ + + const double A[] = { + 1.37446543561352307156E-16, + 4.25981614279661018399E-14, + 1.03496952576338420167E-11, + 1.90451637722020886025E-9, + 2.53479107902614945675E-7, + 2.28621210311945178607E-5, + 1.26461541144692592338E-3, + 3.59799365153615016266E-2, + 3.44289899924628486886E-1, + -5.35327393233902768720E-1}; + const double B[] = { + 5.30043377268626276149E-18, -1.64758043015242134646E-17, + 5.21039150503902756861E-17, -1.67823109680541210385E-16, + 5.51205597852431940784E-16, -1.84859337734377901440E-15, + 6.34007647740507060557E-15, -2.22751332699166985548E-14, + 8.03289077536357521100E-14, -2.98009692317273043925E-13, + 1.14034058820847496303E-12, -4.51459788337394416547E-12, + 1.85594911495471785253E-11, -7.95748924447710747776E-11, + 3.57739728140030116597E-10, -1.69753450938905987466E-9, + 8.57403401741422608519E-9, -4.66048989768794782956E-8, + 2.76681363944501510342E-7, -1.83175552271911948767E-6, + 1.39498137188764993662E-5, -1.28495495816278026384E-4, + 1.56988388573005337491E-3, -3.14481013119645005427E-2, + 2.44030308206595545468E0 + }; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pmul( + pset1(-1.0), plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pmul(pexp(x), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct k0e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_k0e::run(x); + } +}; + +template +struct k0_retval { + typedef Scalar type; +}; + +template +struct generic_k0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k0f.c + * Modified Bessel function, third kind, order zero + * + * + * + * SYNOPSIS: + * + * float x, y, k0f(); + * + * y = k0f( x ); + * + * + * + * DESCRIPTION: + * + * Returns modified Bessel function of the third kind + * of order zero of the argument. + * + * The range is partitioned into the two intervals [0,8] and + * (8, infinity). Chebyshev polynomial expansions are employed + * in each interval. + * + * + * + * ACCURACY: + * + * Tested at 2000 random points between 0 and 8. Peak absolute + * error (relative when K0 > 1) was 1.46e-14; rms, 4.26e-15. + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 7.8e-7 8.5e-8 + * + * ERROR MESSAGES: + * + * message condition value returned + * K0 domain x <= 0 MAXNUM + * + */ + + const float A[] = {1.90451637722020886025E-9f, 2.53479107902614945675E-7f, + 2.28621210311945178607E-5f, 1.26461541144692592338E-3f, + 3.59799365153615016266E-2f, 3.44289899924628486886E-1f, + -5.35327393233902768720E-1f}; + + const float B[] = {-1.69753450938905987466E-9f, 8.57403401741422608519E-9f, + -4.66048989768794782956E-8f, 2.76681363944501510342E-7f, + -1.83175552271911948767E-6f, 1.39498137188764993662E-5f, + -1.28495495816278026384E-4f, 1.56988388573005337491E-3f, + -3.14481013119645005427E-2f, 2.44030308206595545468E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pmul( + pset1(-1.0), plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pmul( + pexp(-x), + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B)), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct generic_k0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* + * + * Modified Bessel function, third kind, order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, k0(); + * + * y = k0( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order zero of the argument. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 1.4e-15 1.4e-16 + * See k0(). + * + */ + const double A[] = { + 1.37446543561352307156E-16, + 4.25981614279661018399E-14, + 1.03496952576338420167E-11, + 1.90451637722020886025E-9, + 2.53479107902614945675E-7, + 2.28621210311945178607E-5, + 1.26461541144692592338E-3, + 3.59799365153615016266E-2, + 3.44289899924628486886E-1, + -5.35327393233902768720E-1}; + const double B[] = { + 5.30043377268626276149E-18, -1.64758043015242134646E-17, + 5.21039150503902756861E-17, -1.67823109680541210385E-16, + 5.51205597852431940784E-16, -1.84859337734377901440E-15, + 6.34007647740507060557E-15, -2.22751332699166985548E-14, + 8.03289077536357521100E-14, -2.98009692317273043925E-13, + 1.14034058820847496303E-12, -4.51459788337394416547E-12, + 1.85594911495471785253E-11, -7.95748924447710747776E-11, + 3.57739728140030116597E-10, -1.69753450938905987466E-9, + 8.57403401741422608519E-9, -4.66048989768794782956E-8, + 2.76681363944501510342E-7, -1.83175552271911948767E-6, + 1.39498137188764993662E-5, -1.28495495816278026384E-4, + 1.56988388573005337491E-3, -3.14481013119645005427E-2, + 2.44030308206595545468E0 + }; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A); + x_le_two = pmadd( + generic_i0::run(x), pmul( + pset1(-1.0), plog(pmul(pset1(0.5), x))), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pmul( + pexp(-x), + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B)), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct k0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_k0::run(x); + } +}; + +template +struct k1e_retval { + typedef Scalar type; +}; + +template +struct generic_k1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1ef.c + * + * Modified Bessel function, third kind, order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, k1ef(); + * + * y = k1ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order one of the argument: + * + * k1e(x) = exp(x) * k1(x). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 4.9e-7 6.7e-8 + * See k1(). + * + */ + + const float A[] = {-2.21338763073472585583E-8f, -2.43340614156596823496E-6f, + -1.73028895751305206302E-4f, -6.97572385963986435018E-3f, + -1.22611180822657148235E-1f, -3.53155960776544875667E-1f, + 1.52530022733894777053E0f}; + const float B[] = {2.01504975519703286596E-9f, -1.03457624656780970260E-8f, + 5.74108412545004946722E-8f, -3.50196060308781257119E-7f, + 2.40648494783721712015E-6f, -1.93619797416608296024E-5f, + 1.95215518471351631108E-4f, -2.85781685962277938680E-3f, + 1.03923736576817238437E-1f, 2.72062619048444266945E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pmul(x_le_two, pexp(x)); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct generic_k1e { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1e.c + * + * Modified Bessel function, third kind, order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, k1e(); + * + * y = k1e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of the third kind of order one of the argument: + * + * k1e(x) = exp(x) * k1(x). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 7.8e-16 1.2e-16 + * See k1(). + * + */ + const double A[] = {-7.02386347938628759343E-18, -2.42744985051936593393E-15, + -6.66690169419932900609E-13, -1.41148839263352776110E-10, + -2.21338763073472585583E-8, -2.43340614156596823496E-6, + -1.73028895751305206302E-4, -6.97572385963986435018E-3, + -1.22611180822657148235E-1, -3.53155960776544875667E-1, + 1.52530022733894777053E0}; + const double B[] = {-5.75674448366501715755E-18, 1.79405087314755922667E-17, + -5.68946255844285935196E-17, 1.83809354436663880070E-16, + -6.05704724837331885336E-16, 2.03870316562433424052E-15, + -7.01983709041831346144E-15, 2.47715442448130437068E-14, + -8.97670518232499435011E-14, 3.34841966607842919884E-13, + -1.28917396095102890680E-12, 5.13963967348173025100E-12, + -2.12996783842756842877E-11, 9.21831518760500529508E-11, + -4.19035475934189648750E-10, 2.01504975519703286596E-9, + -1.03457624656780970260E-8, 5.74108412545004946722E-8, + -3.50196060308781257119E-7, 2.40648494783721712015E-6, + -1.93619797416608296024E-5, 1.95215518471351631108E-4, + -2.85781685962277938680E-3, 1.03923736576817238437E-1, + 2.72062619048444266945E0}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pmul(x_le_two, pexp(x)); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x)); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct k1e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_k1e::run(x); + } +}; + +template +struct k1_retval { + typedef Scalar type; +}; + +template +struct generic_k1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_k1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1f.c + * Modified Bessel function, third kind, order one + * + * + * + * SYNOPSIS: + * + * float x, y, k1f(); + * + * y = k1f( x ); + * + * + * + * DESCRIPTION: + * + * Computes the modified Bessel function of the third kind + * of order one of the argument. + * + * The range is partitioned into the two intervals [0,2] and + * (2, infinity). Chebyshev polynomial expansions are employed + * in each interval. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 4.6e-7 7.6e-8 + * + * ERROR MESSAGES: + * + * message condition value returned + * k1 domain x <= 0 MAXNUM + * + */ + + const float A[] = {-2.21338763073472585583E-8f, -2.43340614156596823496E-6f, + -1.73028895751305206302E-4f, -6.97572385963986435018E-3f, + -1.22611180822657148235E-1f, -3.53155960776544875667E-1f, + 1.52530022733894777053E0f}; + const float B[] = {2.01504975519703286596E-9f, -1.03457624656780970260E-8f, + 5.74108412545004946722E-8f, -3.50196060308781257119E-7f, + 2.40648494783721712015E-6f, -1.93619797416608296024E-5f, + 1.95215518471351631108E-4f, -2.85781685962277938680E-3f, + 1.03923736576817238437E-1f, 2.72062619048444266945E0f}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pexp(-x), + pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x))); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct generic_k1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* k1.c + * Modified Bessel function, third kind, order one + * + * + * + * SYNOPSIS: + * + * float x, y, k1f(); + * + * y = k1f( x ); + * + * + * + * DESCRIPTION: + * + * Computes the modified Bessel function of the third kind + * of order one of the argument. + * + * The range is partitioned into the two intervals [0,2] and + * (2, infinity). Chebyshev polynomial expansions are employed + * in each interval. + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 4.6e-7 7.6e-8 + * + * ERROR MESSAGES: + * + * message condition value returned + * k1 domain x <= 0 MAXNUM + * + */ + const double A[] = {-7.02386347938628759343E-18, -2.42744985051936593393E-15, + -6.66690169419932900609E-13, -1.41148839263352776110E-10, + -2.21338763073472585583E-8, -2.43340614156596823496E-6, + -1.73028895751305206302E-4, -6.97572385963986435018E-3, + -1.22611180822657148235E-1, -3.53155960776544875667E-1, + 1.52530022733894777053E0}; + const double B[] = {-5.75674448366501715755E-18, 1.79405087314755922667E-17, + -5.68946255844285935196E-17, 1.83809354436663880070E-16, + -6.05704724837331885336E-16, 2.03870316562433424052E-15, + -7.01983709041831346144E-15, 2.47715442448130437068E-14, + -8.97670518232499435011E-14, 3.34841966607842919884E-13, + -1.28917396095102890680E-12, 5.13963967348173025100E-12, + -2.12996783842756842877E-11, 9.21831518760500529508E-11, + -4.19035475934189648750E-10, 2.01504975519703286596E-9, + -1.03457624656780970260E-8, 5.74108412545004946722E-8, + -3.50196060308781257119E-7, 2.40648494783721712015E-6, + -1.93619797416608296024E-5, 1.95215518471351631108E-4, + -2.85781685962277938680E-3, 1.03923736576817238437E-1, + 2.72062619048444266945E0}; + const T MAXNUM = pset1(NumTraits::infinity()); + const T two = pset1(2.0); + T x_le_two = pdiv(internal::pchebevl::run( + pmadd(x, x, pset1(-2.0)), A), x); + x_le_two = pmadd( + generic_i1::run(x), plog(pmul(pset1(0.5), x)), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), MAXNUM, x_le_two); + T x_gt_two = pmul( + pexp(-x), + pmul( + internal::pchebevl::run( + psub(pdiv(pset1(8.0), x), two), B), + prsqrt(x))); + return pselect(pcmp_le(x, two), x_le_two, x_gt_two); + } +}; + +template +struct k1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_k1::run(x); + } +}; + +template +struct j0_retval { + typedef Scalar type; +}; + +template +struct generic_j0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_j0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0f.c + * Bessel function of order zero + * + * + * + * SYNOPSIS: + * + * float x, y, j0f(); + * + * y = j0f( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order zero of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval the following polynomial + * approximation is used: + * + * + * 2 2 2 + * (w - r ) (w - r ) (w - r ) P(w) + * 1 2 3 + * + * 2 + * where w = x and the three r's are zeros of the function. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x R(1/x^2) - pi/4. The function is + * + * j0(x) = Modulus(x) cos( Phase(x) ). + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 1.3e-7 3.6e-8 + * IEEE 2, 32 100000 1.9e-7 5.4e-8 + * + */ + + const float JP[] = {-6.068350350393235E-008f, 6.388945720783375E-006f, + -3.969646342510940E-004f, 1.332913422519003E-002f, + -1.729150680240724E-001f}; + const float MO[] = {-6.838999669318810E-002f, 1.864949361379502E-001f, + -2.145007480346739E-001f, 1.197549369473540E-001f, + -3.560281861530129E-003f, -4.969382655296620E-002f, + -3.355424622293709E-006f, 7.978845717621440E-001f}; + const float PH[] = {3.242077816988247E+001f, -3.630592630518434E+001f, + 1.756221482109099E+001f, -4.974978466280903E+000f, + 1.001973420681837E+000f, -1.939906941791308E-001f, + 6.490598792654666E-002f, -1.249992184872738E-001f}; + const T DR1 = pset1(5.78318596294678452118f); + const T NEG_PIO4F = pset1(-0.7853981633974483096f); /* -pi / 4 */ + T y = pabs(x); + T z = pmul(y, y); + T y_le_two = pselect( + pcmp_lt(y, pset1(1.0e-3f)), + pmadd(z, pset1(-0.25f), pset1(1.0f)), + pmul(psub(z, DR1), internal::ppolevl::run(z, JP))); + T q = pdiv(pset1(1.0f), y); + T w = prsqrt(y); + T p = pmul(w, internal::ppolevl::run(q, MO)); + w = pmul(q, q); + T yn = pmadd(q, internal::ppolevl::run(w, PH), NEG_PIO4F); + T y_gt_two = pmul(p, pcos(padd(yn, y))); + return pselect(pcmp_le(y, pset1(2.0)), y_le_two, y_gt_two); + } +}; + +template +struct generic_j0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0.c + * Bessel function of order zero + * + * + * + * SYNOPSIS: + * + * double x, y, j0(); + * + * y = j0( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order zero of the argument. + * + * The domain is divided into the intervals [0, 5] and + * (5, infinity). In the first interval the following rational + * approximation is used: + * + * + * 2 2 + * (w - r ) (w - r ) P (w) / Q (w) + * 1 2 3 8 + * + * 2 + * where w = x and the two r's are zeros of the function. + * + * In the second interval, the Hankel asymptotic expansion + * is employed with two rational functions of degree 6/6 + * and 7/7. + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * DEC 0, 30 10000 4.4e-17 6.3e-18 + * IEEE 0, 30 60000 4.2e-16 1.1e-16 + * + */ + const double PP[] = {7.96936729297347051624E-4, 8.28352392107440799803E-2, + 1.23953371646414299388E0, 5.44725003058768775090E0, + 8.74716500199817011941E0, 5.30324038235394892183E0, + 9.99999999999999997821E-1}; + const double PQ[] = {9.24408810558863637013E-4, 8.56288474354474431428E-2, + 1.25352743901058953537E0, 5.47097740330417105182E0, + 8.76190883237069594232E0, 5.30605288235394617618E0, + 1.00000000000000000218E0}; + const double QP[] = {-1.13663838898469149931E-2, -1.28252718670509318512E0, + -1.95539544257735972385E1, -9.32060152123768231369E1, + -1.77681167980488050595E2, -1.47077505154951170175E2, + -5.14105326766599330220E1, -6.05014350600728481186E0}; + const double QQ[] = {1.00000000000000000000E0, 6.43178256118178023184E1, + 8.56430025976980587198E2, 3.88240183605401609683E3, + 7.24046774195652478189E3, 5.93072701187316984827E3, + 2.06209331660327847417E3, 2.42005740240291393179E2}; + const double RP[] = {-4.79443220978201773821E9, 1.95617491946556577543E12, + -2.49248344360967716204E14, 9.70862251047306323952E15}; + const double RQ[] = {1.00000000000000000000E0, 4.99563147152651017219E2, + 1.73785401676374683123E5, 4.84409658339962045305E7, + 1.11855537045356834862E10, 2.11277520115489217587E12, + 3.10518229857422583814E14, 3.18121955943204943306E16, + 1.71086294081043136091E18}; + const T DR1 = pset1(5.78318596294678452118E0); + const T DR2 = pset1(3.04712623436620863991E1); + const T SQ2OPI = pset1(7.9788456080286535587989E-1); /* sqrt(2 / pi) */ + const T NEG_PIO4 = pset1(-0.7853981633974483096); /* pi / 4 */ + + T y = pabs(x); + T z = pmul(y, y); + T y_le_five = pselect( + pcmp_lt(y, pset1(1.0e-5)), + pmadd(z, pset1(-0.25), pset1(1.0)), + pmul(pmul(psub(z, DR1), psub(z, DR2)), + pdiv(internal::ppolevl::run(z, RP), + internal::ppolevl::run(z, RQ)))); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T yn = padd(y, NEG_PIO4); + T w = pdiv(pset1(-5.0), y); + p = pmadd(p, pcos(yn), pmul(w, pmul(q, psin(yn)))); + T y_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(y))); + return pselect(pcmp_le(y, pset1(5.0)), y_le_five, y_gt_five); + } +}; + +template +struct j0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_j0::run(x); + } +}; + +template +struct y0_retval { + typedef Scalar type; +}; + +template +struct generic_y0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_y0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0f.c + * Bessel function of the second kind, order zero + * + * + * + * SYNOPSIS: + * + * float x, y, y0f(); + * + * y = y0f( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind, of order + * zero, of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval a rational approximation + * R(x) is employed to compute + * + * 2 2 2 + * y0(x) = (w - r ) (w - r ) (w - r ) R(x) + 2/pi ln(x) j0(x). + * 1 2 3 + * + * Thus a call to j0() is required. The three zeros are removed + * from R(x) to improve its numerical stability. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x S(1/x^2) - pi/4. Then the function is + * + * y0(x) = Modulus(x) sin( Phase(x) ). + * + * + * + * + * ACCURACY: + * + * Absolute error, when y0(x) < 1; else relative error: + * + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 2.4e-7 3.4e-8 + * IEEE 2, 32 100000 1.8e-7 5.3e-8 + * + */ + + const float YP[] = {9.454583683980369E-008f, -9.413212653797057E-006f, + 5.344486707214273E-004f, -1.584289289821316E-002f, + 1.707584643733568E-001f}; + const float MO[] = {-6.838999669318810E-002f, 1.864949361379502E-001f, + -2.145007480346739E-001f, 1.197549369473540E-001f, + -3.560281861530129E-003f, -4.969382655296620E-002f, + -3.355424622293709E-006f, 7.978845717621440E-001f}; + const float PH[] = {3.242077816988247E+001f, -3.630592630518434E+001f, + 1.756221482109099E+001f, -4.974978466280903E+000f, + 1.001973420681837E+000f, -1.939906941791308E-001f, + 6.490598792654666E-002f, -1.249992184872738E-001f}; + const T YZ1 = pset1(0.43221455686510834878f); + const T TWOOPI = pset1(0.636619772367581343075535f); /* 2 / pi */ + const T NEG_PIO4F = pset1(-0.7853981633974483096f); /* -pi / 4 */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + T z = pmul(x, x); + T x_le_two = pmul(TWOOPI, pmul(plog(x), generic_j0::run(x))); + x_le_two = pmadd( + psub(z, YZ1), internal::ppolevl::run(z, YP), x_le_two); + x_le_two = pselect(pcmp_le(x, pset1(0.0)), NEG_MAXNUM, x_le_two); + T q = pdiv(pset1(1.0), x); + T w = prsqrt(x); + T p = pmul(w, internal::ppolevl::run(q, MO)); + T u = pmul(q, q); + T xn = pmadd(q, internal::ppolevl::run(u, PH), NEG_PIO4F); + T x_gt_two = pmul(p, psin(padd(xn, x))); + return pselect(pcmp_le(x, pset1(2.0)), x_le_two, x_gt_two); + } +}; + +template +struct generic_y0 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j0.c + * Bessel function of the second kind, order zero + * + * + * + * SYNOPSIS: + * + * double x, y, y0(); + * + * y = y0( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind, of order + * zero, of the argument. + * + * The domain is divided into the intervals [0, 5] and + * (5, infinity). In the first interval a rational approximation + * R(x) is employed to compute + * y0(x) = R(x) + 2 * log(x) * j0(x) / PI. + * Thus a call to j0() is required. + * + * In the second interval, the Hankel asymptotic expansion + * is employed with two rational functions of degree 6/6 + * and 7/7. + * + * + * + * ACCURACY: + * + * Absolute error, when y0(x) < 1; else relative error: + * + * arithmetic domain # trials peak rms + * DEC 0, 30 9400 7.0e-17 7.9e-18 + * IEEE 0, 30 30000 1.3e-15 1.6e-16 + * + */ + const double PP[] = {7.96936729297347051624E-4, 8.28352392107440799803E-2, + 1.23953371646414299388E0, 5.44725003058768775090E0, + 8.74716500199817011941E0, 5.30324038235394892183E0, + 9.99999999999999997821E-1}; + const double PQ[] = {9.24408810558863637013E-4, 8.56288474354474431428E-2, + 1.25352743901058953537E0, 5.47097740330417105182E0, + 8.76190883237069594232E0, 5.30605288235394617618E0, + 1.00000000000000000218E0}; + const double QP[] = {-1.13663838898469149931E-2, -1.28252718670509318512E0, + -1.95539544257735972385E1, -9.32060152123768231369E1, + -1.77681167980488050595E2, -1.47077505154951170175E2, + -5.14105326766599330220E1, -6.05014350600728481186E0}; + const double QQ[] = {1.00000000000000000000E0, 6.43178256118178023184E1, + 8.56430025976980587198E2, 3.88240183605401609683E3, + 7.24046774195652478189E3, 5.93072701187316984827E3, + 2.06209331660327847417E3, 2.42005740240291393179E2}; + const double YP[] = {1.55924367855235737965E4, -1.46639295903971606143E7, + 5.43526477051876500413E9, -9.82136065717911466409E11, + 8.75906394395366999549E13, -3.46628303384729719441E15, + 4.42733268572569800351E16, -1.84950800436986690637E16}; + const double YQ[] = {1.00000000000000000000E0, 1.04128353664259848412E3, + 6.26107330137134956842E5, 2.68919633393814121987E8, + 8.64002487103935000337E10, 2.02979612750105546709E13, + 3.17157752842975028269E15, 2.50596256172653059228E17}; + const T SQ2OPI = pset1(7.9788456080286535587989E-1); /* sqrt(2 / pi) */ + const T TWOOPI = pset1(0.636619772367581343075535); /* 2 / pi */ + const T NEG_PIO4 = pset1(-0.7853981633974483096); /* -pi / 4 */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + + T z = pmul(x, x); + T x_le_five = pdiv(internal::ppolevl::run(z, YP), + internal::ppolevl::run(z, YQ)); + x_le_five = pmadd( + pmul(TWOOPI, plog(x)), generic_j0::run(x), x_le_five); + x_le_five = pselect(pcmp_le(x, pset1(0.0)), NEG_MAXNUM, x_le_five); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T xn = padd(x, NEG_PIO4); + T w = pdiv(pset1(5.0), x); + p = pmadd(p, psin(xn), pmul(w, pmul(q, pcos(xn)))); + T x_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(x))); + return pselect(pcmp_le(x, pset1(5.0)), x_le_five, x_gt_five); + } +}; + +template +struct y0_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_y0::run(x); + } +}; + +template +struct j1_retval { + typedef Scalar type; +}; + +template +struct generic_j1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_j1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1f.c + * Bessel function of order one + * + * + * + * SYNOPSIS: + * + * float x, y, j1f(); + * + * y = j1f( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order one of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval a polynomial approximation + * 2 + * (w - r ) x P(w) + * 1 + * 2 + * is used, where w = x and r is the first zero of the function. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x R(1/x^2) - 3pi/4. The function is + * + * j0(x) = Modulus(x) cos( Phase(x) ). + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 1.2e-7 2.5e-8 + * IEEE 2, 32 100000 2.0e-7 5.3e-8 + * + * + */ + + const float JP[] = {-4.878788132172128E-009f, 6.009061827883699E-007f, + -4.541343896997497E-005f, 1.937383947804541E-003f, + -3.405537384615824E-002f}; + const float MO1[] = {6.913942741265801E-002f, -2.284801500053359E-001f, + 3.138238455499697E-001f, -2.102302420403875E-001f, + 5.435364690523026E-003f, 1.493389585089498E-001f, + 4.976029650847191E-006f, 7.978845453073848E-001f}; + const float PH1[] = {-4.497014141919556E+001f, 5.073465654089319E+001f, + -2.485774108720340E+001f, 7.222973196770240E+000f, + -1.544842782180211E+000f, 3.503787691653334E-001f, + -1.637986776941202E-001f, 3.749989509080821E-001f}; + const T Z1 = pset1(1.46819706421238932572E1f); + const T NEG_THPIO4F = pset1(-2.35619449019234492885f); /* -3*pi/4 */ + + T y = pabs(x); + T z = pmul(y, y); + T y_le_two = pmul( + psub(z, Z1), + pmul(x, internal::ppolevl::run(z, JP))); + T q = pdiv(pset1(1.0f), y); + T w = prsqrt(y); + T p = pmul(w, internal::ppolevl::run(q, MO1)); + w = pmul(q, q); + T yn = pmadd(q, internal::ppolevl::run(w, PH1), NEG_THPIO4F); + T y_gt_two = pmul(p, pcos(padd(yn, y))); + // j1 is an odd function. This implementation differs from cephes to + // take this fact in to account. Cephes returns -j1(x) for y > 2 range. + y_gt_two = pselect( + pcmp_lt(x, pset1(0.0f)), pmul(pset1(-1.0f), y_gt_two), y_gt_two); + return pselect(pcmp_le(y, pset1(2.0f)), y_le_two, y_gt_two); + } +}; + +template +struct generic_j1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1.c + * Bessel function of order one + * + * + * + * SYNOPSIS: + * + * double x, y, j1(); + * + * y = j1( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of order one of the argument. + * + * The domain is divided into the intervals [0, 8] and + * (8, infinity). In the first interval a 24 term Chebyshev + * expansion is used. In the second, the asymptotic + * trigonometric representation is employed using two + * rational functions of degree 5/5. + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * DEC 0, 30 10000 4.0e-17 1.1e-17 + * IEEE 0, 30 30000 2.6e-16 1.1e-16 + * + */ + const double PP[] = {7.62125616208173112003E-4, 7.31397056940917570436E-2, + 1.12719608129684925192E0, 5.11207951146807644818E0, + 8.42404590141772420927E0, 5.21451598682361504063E0, + 1.00000000000000000254E0}; + const double PQ[] = {5.71323128072548699714E-4, 6.88455908754495404082E-2, + 1.10514232634061696926E0, 5.07386386128601488557E0, + 8.39985554327604159757E0, 5.20982848682361821619E0, + 9.99999999999999997461E-1}; + const double QP[] = {5.10862594750176621635E-2, 4.98213872951233449420E0, + 7.58238284132545283818E1, 3.66779609360150777800E2, + 7.10856304998926107277E2, 5.97489612400613639965E2, + 2.11688757100572135698E2, 2.52070205858023719784E1}; + const double QQ[] = {1.00000000000000000000E0, 7.42373277035675149943E1, + 1.05644886038262816351E3, 4.98641058337653607651E3, + 9.56231892404756170795E3, 7.99704160447350683650E3, + 2.82619278517639096600E3, 3.36093607810698293419E2}; + const double RP[] = {-8.99971225705559398224E8, 4.52228297998194034323E11, + -7.27494245221818276015E13, 3.68295732863852883286E15}; + const double RQ[] = {1.00000000000000000000E0, 6.20836478118054335476E2, + 2.56987256757748830383E5, 8.35146791431949253037E7, + 2.21511595479792499675E10, 4.74914122079991414898E12, + 7.84369607876235854894E14, 8.95222336184627338078E16, + 5.32278620332680085395E18}; + const T Z1 = pset1(1.46819706421238932572E1); + const T Z2 = pset1(4.92184563216946036703E1); + const T NEG_THPIO4 = pset1(-2.35619449019234492885); /* -3*pi/4 */ + const T SQ2OPI = pset1(7.9788456080286535587989E-1); /* sqrt(2 / pi) */ + T y = pabs(x); + T z = pmul(y, y); + T y_le_five = pdiv(internal::ppolevl::run(z, RP), + internal::ppolevl::run(z, RQ)); + y_le_five = pmul(pmul(pmul(y_le_five, x), psub(z, Z1)), psub(z, Z2)); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T yn = padd(y, NEG_THPIO4); + T w = pdiv(pset1(-5.0), y); + p = pmadd(p, pcos(yn), pmul(w, pmul(q, psin(yn)))); + T y_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(y))); + // j1 is an odd function. This implementation differs from cephes to + // take this fact in to account. Cephes returns -j1(x) for y > 5 range. + y_gt_five = pselect( + pcmp_lt(x, pset1(0.0f)), pmul(pset1(-1.0), y_gt_five), y_gt_five); + return pselect(pcmp_le(y, pset1(5.0)), y_le_five, y_gt_five); + } +}; + +template +struct j1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_j1::run(x); + } +}; + +template +struct y1_retval { + typedef Scalar type; +}; + +template +struct generic_y1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T&) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return ScalarType(0); + } +}; + +template +struct generic_y1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1f.c + * Bessel function of second kind of order one + * + * + * + * SYNOPSIS: + * + * double x, y, y1(); + * + * y = y1( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind of order one + * of the argument. + * + * The domain is divided into the intervals [0, 2] and + * (2, infinity). In the first interval a rational approximation + * R(x) is employed to compute + * + * 2 + * y0(x) = (w - r ) x R(x^2) + 2/pi (ln(x) j1(x) - 1/x) . + * 1 + * + * Thus a call to j1() is required. + * + * In the second interval, the modulus and phase are approximated + * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x) + * and Phase(x) = x + 1/x S(1/x^2) - 3pi/4. Then the function is + * + * y0(x) = Modulus(x) sin( Phase(x) ). + * + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * IEEE 0, 2 100000 2.2e-7 4.6e-8 + * IEEE 2, 32 100000 1.9e-7 5.3e-8 + * + * (error criterion relative when |y1| > 1). + * + */ + + const float YP[] = {8.061978323326852E-009f, -9.496460629917016E-007f, + 6.719543806674249E-005f, -2.641785726447862E-003f, + 4.202369946500099E-002f}; + const float MO1[] = {6.913942741265801E-002f, -2.284801500053359E-001f, + 3.138238455499697E-001f, -2.102302420403875E-001f, + 5.435364690523026E-003f, 1.493389585089498E-001f, + 4.976029650847191E-006f, 7.978845453073848E-001f}; + const float PH1[] = {-4.497014141919556E+001f, 5.073465654089319E+001f, + -2.485774108720340E+001f, 7.222973196770240E+000f, + -1.544842782180211E+000f, 3.503787691653334E-001f, + -1.637986776941202E-001f, 3.749989509080821E-001f}; + const T YO1 = pset1(4.66539330185668857532f); + const T NEG_THPIO4F = pset1(-2.35619449019234492885f); /* -3*pi/4 */ + const T TWOOPI = pset1(0.636619772367581343075535f); /* 2/pi */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + + T z = pmul(x, x); + T x_le_two = pmul(psub(z, YO1), internal::ppolevl::run(z, YP)); + x_le_two = pmadd( + x_le_two, x, + pmul(TWOOPI, pmadd( + generic_j1::run(x), plog(x), + pdiv(pset1(-1.0f), x)))); + x_le_two = pselect(pcmp_lt(x, pset1(0.0f)), NEG_MAXNUM, x_le_two); + + T q = pdiv(pset1(1.0), x); + T w = prsqrt(x); + T p = pmul(w, internal::ppolevl::run(q, MO1)); + w = pmul(q, q); + T xn = pmadd(q, internal::ppolevl::run(w, PH1), NEG_THPIO4F); + T x_gt_two = pmul(p, psin(padd(xn, x))); + return pselect(pcmp_le(x, pset1(2.0)), x_le_two, x_gt_two); + } +}; + +template +struct generic_y1 { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE T run(const T& x) { + /* j1.c + * Bessel function of second kind of order one + * + * + * + * SYNOPSIS: + * + * double x, y, y1(); + * + * y = y1( x ); + * + * + * + * DESCRIPTION: + * + * Returns Bessel function of the second kind of order one + * of the argument. + * + * The domain is divided into the intervals [0, 8] and + * (8, infinity). In the first interval a 25 term Chebyshev + * expansion is used, and a call to j1() is required. + * In the second, the asymptotic trigonometric representation + * is employed using two rational functions of degree 5/5. + * + * + * + * ACCURACY: + * + * Absolute error: + * arithmetic domain # trials peak rms + * DEC 0, 30 10000 8.6e-17 1.3e-17 + * IEEE 0, 30 30000 1.0e-15 1.3e-16 + * + * (error criterion relative when |y1| > 1). + * + */ + const double PP[] = {7.62125616208173112003E-4, 7.31397056940917570436E-2, + 1.12719608129684925192E0, 5.11207951146807644818E0, + 8.42404590141772420927E0, 5.21451598682361504063E0, + 1.00000000000000000254E0}; + const double PQ[] = {5.71323128072548699714E-4, 6.88455908754495404082E-2, + 1.10514232634061696926E0, 5.07386386128601488557E0, + 8.39985554327604159757E0, 5.20982848682361821619E0, + 9.99999999999999997461E-1}; + const double QP[] = {5.10862594750176621635E-2, 4.98213872951233449420E0, + 7.58238284132545283818E1, 3.66779609360150777800E2, + 7.10856304998926107277E2, 5.97489612400613639965E2, + 2.11688757100572135698E2, 2.52070205858023719784E1}; + const double QQ[] = {1.00000000000000000000E0, 7.42373277035675149943E1, + 1.05644886038262816351E3, 4.98641058337653607651E3, + 9.56231892404756170795E3, 7.99704160447350683650E3, + 2.82619278517639096600E3, 3.36093607810698293419E2}; + const double YP[] = {1.26320474790178026440E9, -6.47355876379160291031E11, + 1.14509511541823727583E14, -8.12770255501325109621E15, + 2.02439475713594898196E17, -7.78877196265950026825E17}; + const double YQ[] = {1.00000000000000000000E0, 5.94301592346128195359E2, + 2.35564092943068577943E5, 7.34811944459721705660E7, + 1.87601316108706159478E10, 3.88231277496238566008E12, + 6.20557727146953693363E14, 6.87141087355300489866E16, + 3.97270608116560655612E18}; + const T SQ2OPI = pset1(.79788456080286535588); + const T NEG_THPIO4 = pset1(-2.35619449019234492885); /* -3*pi/4 */ + const T TWOOPI = pset1(0.636619772367581343075535); /* 2/pi */ + const T NEG_MAXNUM = pset1(-NumTraits::infinity()); + + T z = pmul(x, x); + T x_le_five = pdiv(internal::ppolevl::run(z, YP), + internal::ppolevl::run(z, YQ)); + x_le_five = pmadd( + x_le_five, x, pmul( + TWOOPI, pmadd(generic_j1::run(x), plog(x), + pdiv(pset1(-1.0), x)))); + + x_le_five = pselect(pcmp_le(x, pset1(0.0)), NEG_MAXNUM, x_le_five); + T s = pdiv(pset1(25.0), z); + T p = pdiv( + internal::ppolevl::run(s, PP), + internal::ppolevl::run(s, PQ)); + T q = pdiv( + internal::ppolevl::run(s, QP), + internal::ppolevl::run(s, QQ)); + T xn = padd(x, NEG_THPIO4); + T w = pdiv(pset1(5.0), x); + p = pmadd(p, psin(xn), pmul(w, pmul(q, pcos(xn)))); + T x_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(x))); + return pselect(pcmp_le(x, pset1(5.0)), x_le_five, x_gt_five); + } +}; + +template +struct y1_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_y1::run(x); + } +}; + +} // end namespace internal + +namespace numext { + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0, Scalar) + i0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(i0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0e, Scalar) + i0e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(i0e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1, Scalar) + i1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(i1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1e, Scalar) + i1e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(i1e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(k0, Scalar) + k0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(k0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(k0e, Scalar) + k0e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(k0e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(k1, Scalar) + k1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(k1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(k1e, Scalar) + k1e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(k1e, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(j0, Scalar) + j0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(j0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(y0, Scalar) + y0(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(y0, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(j1, Scalar) + j1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(j1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(y1, Scalar) + y1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(y1, Scalar)::run(x); +} + +} // end namespace numext + +} // end namespace Eigen + +#endif // EIGEN_BESSEL_FUNCTIONS_H diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h new file mode 100644 index 000000000..70eaad5cd --- /dev/null +++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h @@ -0,0 +1,130 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BESSELFUNCTIONS_PACKETMATH_H +#define EIGEN_BESSELFUNCTIONS_PACKETMATH_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero i0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pi0(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_i0; return generic_i0::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero i0e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pi0e(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_i0e; return generic_i0e::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one i1(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pi1(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_i1; return generic_i1::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one i1e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pi1e(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_i1e; return generic_i1e::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero j0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pj0(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_j0; return generic_j0::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero j1(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pj1(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_j1; return generic_j1::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one y0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet py0(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_y0; return generic_y0::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one y1(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet py1(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_y1; return generic_y1::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero k0(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pk0(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_k0; return generic_k0::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero k0e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pk0e(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_k0e; return generic_k0e::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one k1e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pk1(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_k1; return generic_k1::run(x); +} + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one k1e(\a a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pk1e(const Packet& x) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_k1e; return generic_k1e::run(x); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BESSELFUNCTIONS_PACKETMATH_H + diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h index 617401e9d..691ff4d03 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h @@ -161,51 +161,6 @@ zeta(const Eigen::ArrayBase& x, const Eigen::ArrayBase& q) ); } -/** \returns an expression of the coefficient-wise i0e(\a x) to the given - * arrays. - * - * It returns the exponentially scaled modified Bessel - * function of order zero. - * - * \param x is the argument - * - * \note This function supports only float and double scalar types. To support - * other scalar types, the user has to provide implementations of i0e(T) for - * any scalar type T to be supported. - * - * \sa ArrayBase::i0e() - */ -template -EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< - Eigen::internal::scalar_i0e_op, const Derived> -i0e(const Eigen::ArrayBase& x) { - return Eigen::CwiseUnaryOp< - Eigen::internal::scalar_i0e_op, - const Derived>(x.derived()); -} - -/** \returns an expression of the coefficient-wise i1e(\a x) to the given - * arrays. - * - * It returns the exponentially scaled modified Bessel - * function of order one. - * - * \param x is the argument - * - * \note This function supports only float and double scalar types. To support - * other scalar types, the user has to provide implementations of i1e(T) for - * any scalar type T to be supported. - * - * \sa ArrayBase::i1e() - */ -template -EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< - Eigen::internal::scalar_i1e_op, const Derived> -i1e(const Eigen::ArrayBase& x) { - return Eigen::CwiseUnaryOp< - Eigen::internal::scalar_i1e_op, - const Derived>(x.derived()); -} } // end namespace Eigen diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index 13a72a3ee..a4287c31f 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -308,60 +308,6 @@ struct functor_traits > }; }; -/** \internal - * \brief Template functor to compute the exponentially scaled modified Bessel - * function of order zero - * \sa class CwiseUnaryOp, Cwise::i0e() - */ -template -struct scalar_i0e_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { - using numext::i0e; - return i0e(x); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { - return internal::pi0e(x); - } -}; -template -struct functor_traits > { - enum { - // On average, a Chebyshev polynomial of order N=20 is computed. - // The cost is N multiplications and 2N additions. - Cost = 20 * NumTraits::MulCost + 40 * NumTraits::AddCost, - PacketAccess = packet_traits::HasI0e - }; -}; - -/** \internal - * \brief Template functor to compute the exponentially scaled modified Bessel - * function of order zero - * \sa class CwiseUnaryOp, Cwise::i1e() - */ -template -struct scalar_i1e_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { - using numext::i1e; - return i1e(x); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { - return internal::pi1e(x); - } -}; -template -struct functor_traits > { - enum { - // On average, a Chebyshev polynomial of order N=20 is computed. - // The cost is N multiplications and 2N additions. - Cost = 20 * NumTraits::MulCost + 40 * NumTraits::AddCost, - PacketAccess = packet_traits::HasI1e - }; -}; - } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h index 538db2afa..2a3a53168 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h @@ -50,14 +50,6 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) { return Eigen::half(Eigen::numext::betainc(static_cast(a), static_cast(b), static_cast(x))); } -template <> -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0e(const Eigen::half& x) { - return Eigen::half(Eigen::numext::i0e(static_cast(x))); -} -template <> -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1e(const Eigen::half& x) { - return Eigen::half(Eigen::numext::i1e(static_cast(x))); -} #endif } // end namespace numext diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 7c6d32049..ea00bd96e 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -1757,7 +1757,7 @@ struct betainc_helper { if ((a + b) < maxgam && numext::abs(u) < maxlog) { t = gamma(a + b) / (gamma(a) * gamma(b)); s = s * t * pow(x, a); - } else { + } */ t = lgamma_impl::run(a + b) - lgamma_impl::run(a) - lgamma_impl::run(b) + u + numext::log(s); @@ -1864,351 +1864,6 @@ struct betainc_impl { #endif // EIGEN_HAS_C99_MATH -/**************************************************************************** - * Implementation of Bessel function, based on Cephes * - ****************************************************************************/ - -template -struct i0e_retval { - typedef Scalar type; -}; - -template -struct generic_i0e { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE T run(const T&) { - EIGEN_STATIC_ASSERT((internal::is_same::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return ScalarType(0); - } -}; - -template -struct generic_i0e { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE T run(const T& x) { - /* i0ef.c - * - * Modified Bessel function of order zero, - * exponentially scaled - * - * - * - * SYNOPSIS: - * - * float x, y, i0ef(); - * - * y = i0ef( x ); - * - * - * - * DESCRIPTION: - * - * Returns exponentially scaled modified Bessel function - * of order zero of the argument. - * - * The function is defined as i0e(x) = exp(-|x|) j0( ix ). - * - * - * - * ACCURACY: - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0,30 100000 3.7e-7 7.0e-8 - * See i0f(). - * - */ - - const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f, - -2.67079385394061173391E-7f, 1.11738753912010371815E-6f, - -4.41673835845875056359E-6f, 1.64484480707288970893E-5f, - -5.75419501008210370398E-5f, 1.88502885095841655729E-4f, - -5.76375574538582365885E-4f, 1.63947561694133579842E-3f, - -4.32430999505057594430E-3f, 1.05464603945949983183E-2f, - -2.37374148058994688156E-2f, 4.93052842396707084878E-2f, - -9.49010970480476444210E-2f, 1.71620901522208775349E-1f, - -3.04682672343198398683E-1f, 6.76795274409476084995E-1f}; - - const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f, - 2.04891858946906374183E-7f, 2.89137052083475648297E-6f, - 6.88975834691682398426E-5f, 3.36911647825569408990E-3f, - 8.04490411014108831608E-1f}; - T y = pabs(x); - T y_le_eight = internal::pchebevl::run( - pmadd(pset1(0.5f), y, pset1(-2.0f)), A); - T y_gt_eight = pdiv( - internal::pchebevl::run( - psub(pdiv(pset1(32.0f), y), pset1(2.0f)), B), - psqrt(y)); - // TODO: Perhaps instead check whether all packet elements are in - // [-8, 8] and evaluate a branch based off of that. It's possible - // in practice most elements are in this region. - return pselect(pcmp_le(y, pset1(8.0f)), y_le_eight, y_gt_eight); - } -}; - -template -struct generic_i0e { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE T run(const T& x) { - /* i0e.c - * - * Modified Bessel function of order zero, - * exponentially scaled - * - * - * - * SYNOPSIS: - * - * double x, y, i0e(); - * - * y = i0e( x ); - * - * - * - * DESCRIPTION: - * - * Returns exponentially scaled modified Bessel function - * of order zero of the argument. - * - * The function is defined as i0e(x) = exp(-|x|) j0( ix ). - * - * - * - * ACCURACY: - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0,30 30000 5.4e-16 1.2e-16 - * See i0(). - * - */ - - const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17, - -2.43127984654795469359E-16, 1.71539128555513303061E-15, - -1.16853328779934516808E-14, 7.67618549860493561688E-14, - -4.85644678311192946090E-13, 2.95505266312963983461E-12, - -1.72682629144155570723E-11, 9.67580903537323691224E-11, - -5.18979560163526290666E-10, 2.65982372468238665035E-9, - -1.30002500998624804212E-8, 6.04699502254191894932E-8, - -2.67079385394061173391E-7, 1.11738753912010371815E-6, - -4.41673835845875056359E-6, 1.64484480707288970893E-5, - -5.75419501008210370398E-5, 1.88502885095841655729E-4, - -5.76375574538582365885E-4, 1.63947561694133579842E-3, - -4.32430999505057594430E-3, 1.05464603945949983183E-2, - -2.37374148058994688156E-2, 4.93052842396707084878E-2, - -9.49010970480476444210E-2, 1.71620901522208775349E-1, - -3.04682672343198398683E-1, 6.76795274409476084995E-1}; - const double B[] = { - -7.23318048787475395456E-18, -4.83050448594418207126E-18, - 4.46562142029675999901E-17, 3.46122286769746109310E-17, - -2.82762398051658348494E-16, -3.42548561967721913462E-16, - 1.77256013305652638360E-15, 3.81168066935262242075E-15, - -9.55484669882830764870E-15, -4.15056934728722208663E-14, - 1.54008621752140982691E-14, 3.85277838274214270114E-13, - 7.18012445138366623367E-13, -1.79417853150680611778E-12, - -1.32158118404477131188E-11, -3.14991652796324136454E-11, - 1.18891471078464383424E-11, 4.94060238822496958910E-10, - 3.39623202570838634515E-9, 2.26666899049817806459E-8, - 2.04891858946906374183E-7, 2.89137052083475648297E-6, - 6.88975834691682398426E-5, 3.36911647825569408990E-3, - 8.04490411014108831608E-1}; - T y = pabs(x); - T y_le_eight = internal::pchebevl::run( - pmadd(pset1(0.5), y, pset1(-2.0)), A); - T y_gt_eight = pdiv( - internal::pchebevl::run( - psub(pdiv(pset1(32.0), y), pset1(2.0)), B), - psqrt(y)); - // TODO: Perhaps instead check whether all packet elements are in - // [-8, 8] and evaluate a branch based off of that. It's possible - // in practice most elements are in this region. - return pselect(pcmp_le(y, pset1(8.0)), y_le_eight, y_gt_eight); - } -}; - -template -struct i0e_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { - return generic_i0e::run(x); - } -}; - - -template -struct i1e_retval { - typedef Scalar type; -}; - -template -struct generic_i1e { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE T run(const T&) { - EIGEN_STATIC_ASSERT((internal::is_same::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return ScalarType(0); - } -}; - -template -struct generic_i1e { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE T run(const T& x) { - /* i1ef.c - * - * Modified Bessel function of order one, - * exponentially scaled - * - * - * - * SYNOPSIS: - * - * float x, y, i1ef(); - * - * y = i1ef( x ); - * - * - * - * DESCRIPTION: - * - * Returns exponentially scaled modified Bessel function - * of order one of the argument. - * - * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). - * - * - * - * ACCURACY: - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0, 30 30000 1.5e-6 1.5e-7 - * See i1(). - * - */ - const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f, - 2.00329475355213526229E-7f, -8.56872026469545474066E-7f, - 3.47025130813767847674E-6f, -1.32731636560394358279E-5f, - 4.78156510755005422638E-5f, -1.61760815825896745588E-4f, - 5.12285956168575772895E-4f, -1.51357245063125314899E-3f, - 4.15642294431288815669E-3f, -1.05640848946261981558E-2f, - 2.47264490306265168283E-2f, -5.29459812080949914269E-2f, - 1.02643658689847095384E-1f, -1.76416518357834055153E-1f, - 2.52587186443633654823E-1f}; - - const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f, - -2.51223623787020892529E-7f, -3.88256480887769039346E-6f, - -1.10588938762623716291E-4f, -9.76109749136146840777E-3f, - 7.78576235018280120474E-1f}; - - - T y = pabs(x); - T y_le_eight = pmul(y, internal::pchebevl::run( - pmadd(pset1(0.5f), y, pset1(-2.0f)), A)); - T y_gt_eight = pdiv( - internal::pchebevl::run( - psub(pdiv(pset1(32.0f), y), - pset1(2.0f)), B), - psqrt(y)); - // TODO: Perhaps instead check whether all packet elements are in - // [-8, 8] and evaluate a branch based off of that. It's possible - // in practice most elements are in this region. - y = pselect(pcmp_le(y, pset1(8.0f)), y_le_eight, y_gt_eight); - return pselect(pcmp_lt(x, pset1(0.0f)), -y, y); - } -}; - -template -struct generic_i1e { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE T run(const T& x) { - /* i1e.c - * - * Modified Bessel function of order one, - * exponentially scaled - * - * - * - * SYNOPSIS: - * - * double x, y, i1e(); - * - * y = i1e( x ); - * - * - * - * DESCRIPTION: - * - * Returns exponentially scaled modified Bessel function - * of order one of the argument. - * - * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). - * - * - * - * ACCURACY: - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0, 30 30000 2.0e-15 2.0e-16 - * See i1(). - * - */ - const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17, - 1.55363195773620046921E-16, -1.10559694773538630805E-15, - 7.60068429473540693410E-15, -5.04218550472791168711E-14, - 3.22379336594557470981E-13, -1.98397439776494371520E-12, - 1.17361862988909016308E-11, -6.66348972350202774223E-11, - 3.62559028155211703701E-10, -1.88724975172282928790E-9, - 9.38153738649577178388E-9, -4.44505912879632808065E-8, - 2.00329475355213526229E-7, -8.56872026469545474066E-7, - 3.47025130813767847674E-6, -1.32731636560394358279E-5, - 4.78156510755005422638E-5, -1.61760815825896745588E-4, - 5.12285956168575772895E-4, -1.51357245063125314899E-3, - 4.15642294431288815669E-3, -1.05640848946261981558E-2, - 2.47264490306265168283E-2, -5.29459812080949914269E-2, - 1.02643658689847095384E-1, -1.76416518357834055153E-1, - 2.52587186443633654823E-1}; - const double B[] = { - 7.51729631084210481353E-18, 4.41434832307170791151E-18, - -4.65030536848935832153E-17, -3.20952592199342395980E-17, - 2.96262899764595013876E-16, 3.30820231092092828324E-16, - -1.88035477551078244854E-15, -3.81440307243700780478E-15, - 1.04202769841288027642E-14, 4.27244001671195135429E-14, - -2.10154184277266431302E-14, -4.08355111109219731823E-13, - -7.19855177624590851209E-13, 2.03562854414708950722E-12, - 1.41258074366137813316E-11, 3.25260358301548823856E-11, - -1.89749581235054123450E-11, -5.58974346219658380687E-10, - -3.83538038596423702205E-9, -2.63146884688951950684E-8, - -2.51223623787020892529E-7, -3.88256480887769039346E-6, - -1.10588938762623716291E-4, -9.76109749136146840777E-3, - 7.78576235018280120474E-1}; - T y = pabs(x); - T y_le_eight = pmul(y, internal::pchebevl::run( - pmadd(pset1(0.5), y, pset1(-2.0)), A)); - T y_gt_eight = pdiv( - internal::pchebevl::run( - psub(pdiv(pset1(32.0), y), - pset1(2.0)), B), - psqrt(y)); - // TODO: Perhaps instead check whether all packet elements are in - // [-8, 8] and evaluate a branch based off of that. It's possible - // in practice most elements are in this region. - y = pselect(pcmp_le(y, pset1(8.0)), y_le_eight, y_gt_eight); - return pselect(pcmp_lt(x, pset1(0.0f)), -y, y); - } -}; - -template -struct i1e_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { - return generic_i1e::run(x); - } -}; - } // end namespace internal namespace numext { @@ -2285,21 +1940,7 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar) return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x); } -template -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0e, Scalar) - i0e(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(i0e, Scalar)::run(x); -} - -template -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1e, Scalar) - i1e(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(i1e, Scalar)::run(x); -} - } // end namespace numext - - } // end namespace Eigen #endif // EIGEN_SPECIAL_FUNCTIONS_H diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 21908e512..577015690 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -72,24 +72,6 @@ Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; retur template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); } -/** \internal \returns the exponentially scaled modified Bessel function of - * order zero i0e(\a a) (coeff-wise) */ -template -EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pi0e(const Packet& x) { - typedef typename unpacket_traits::type ScalarType; - using internal::generic_i0e; return generic_i0e::run(x); -} - -/** \internal \returns the exponentially scaled modified Bessel function of - * order one i1e(\a a) (coeff-wise) */ -template -EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pi1e(const Packet& x) { - typedef typename unpacket_traits::type ScalarType; - using internal::generic_i1e; return generic_i1e::run(x); -} - } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h index c831edc17..b886e278c 100644 --- a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h +++ b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h @@ -217,6 +217,19 @@ pi0e(const double2& x) { return make_double2(i0e(x.x), i0e(x.y)); } +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi0(const float4& x) { + using numext::i0; + return make_float4(i0(x.x), i0(x.y), i0(x.z), i0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pi0(const double2& x) { + using numext::i0; + return make_double2(i0(x.x), i0(x.y)); +} + template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi1e(const float4& x) { using numext::i1e; @@ -230,6 +243,123 @@ pi1e(const double2& x) { return make_double2(i1e(x.x), i1e(x.y)); } +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi1(const float4& x) { + using numext::i1; + return make_float4(i1(x.x), i1(x.y), i1(x.z), i1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pi1(const double2& x) { + using numext::i1; + return make_double2(i1(x.x), i1(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pk0e(const float4& x) { + using numext::k0e; + return make_float4(k0e(x.x), k0e(x.y), k0e(x.z), k0e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pk0e(const double2& x) { + using numext::k0e; + return make_double2(k0e(x.x), k0e(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pk0(const float4& x) { + using numext::k0; + return make_float4(k0(x.x), k0(x.y), k0(x.z), k0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pk0(const double2& x) { + using numext::k0; + return make_double2(k0(x.x), k0(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pk1e(const float4& x) { + using numext::k1e; + return make_float4(k1e(x.x), k1e(x.y), k1e(x.z), k1e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pk1e(const double2& x) { + using numext::k1e; + return make_double2(k1e(x.x), k1e(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pk1(const float4& x) { + using numext::k1; + return make_float4(k1(x.x), k1(x.y), k1(x.z), k1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pk1(const double2& x) { + using numext::k1; + return make_double2(k1(x.x), k1(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pj0(const float4& x) { + using numext::j0; + return make_float4(j0(x.x), j0(x.y), j0(x.z), j0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pj0(const double2& x) { + using numext::j0; + return make_double2(j0(x.x), j0(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pj1(const float4& x) { + using numext::j1; + return make_float4(j1(x.x), j1(x.y), j1(x.z), j1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pj1(const double2& x) { + using numext::j1; + return make_double2(j1(x.x), j1(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 py0(const float4& x) { + using numext::y0; + return make_float4(y0(x.x), y0(x.y), y0(x.z), y0(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +py0(const double2& x) { + using numext::y0; + return make_double2(y0(x.x), y0(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 py1(const float4& x) { + using numext::y1; + return make_float4(y1(x.x), y1(x.y), y1(x.z), y1(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +py1(const double2& x) { + using numext::y1; + return make_double2(y1(x.x), y1(x.y)); +} + #endif } // end namespace internal diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index f1f109ecb..ac958bfe9 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -106,6 +106,7 @@ ei_add_test(dgmres) ei_add_test(minres) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) +ei_add_test(bessel_functions) ei_add_test(special_functions) # TODO: The following test names are prefixed with the cxx11 string, since historically diff --git a/unsupported/test/bessel_functions.cpp b/unsupported/test/bessel_functions.cpp new file mode 100644 index 000000000..2b6bb6b2c --- /dev/null +++ b/unsupported/test/bessel_functions.cpp @@ -0,0 +1,370 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include "../Eigen/SpecialFunctions" + +template +void verify_component_wise(const X& x, const Y& y) +{ + for(Index i=0; i void array_bessel_functions() +{ + // Test Bessel function i0. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << 4.35582826e+07, 6.21841242e+06, 8.93446228e+05, 1.29418563e+05, + 1.89489253e+04, 2.81571663e+03, 4.27564116e+02, 6.72344070e+01, + 1.13019220e+01, 2.27958530e+00, 1.00000000e+00, 2.27958530e+00, + 1.13019220e+01, 6.72344070e+01, 4.27564116e+02, 2.81571663e+03, + 1.89489253e+04, 1.29418563e+05, 8.93446228e+05, 6.21841242e+06, + 4.35582826e+07; + + CALL_SUBTEST(res = i0(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function i0e. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << 0.0897803118848, 0.0947062952128, 0.100544127361, + 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857, + 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554, + 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163, + 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128, + 0.0897803118848; + + CALL_SUBTEST(res = i0e(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function i1. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << -4.24549734e+07, -6.04313324e+06, -8.65059436e+05, -1.24707259e+05, + -1.81413488e+04, -2.67098830e+03, -3.99873137e+02, -6.13419368e+01, + -9.75946515e+00, -1.59063685e+00, 0.00000000e+00, 1.59063685e+00, + 9.75946515e+00, 6.13419368e+01, 3.99873137e+02, 2.67098830e+03, + 1.81413488e+04, 1.24707259e+05, 8.65059436e+05, 6.04313324e+06, + 4.24549734e+07; + + CALL_SUBTEST(res = i1(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function i1e. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << -0.0875062221833, -0.092036796872, -0.0973496147565, + -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293, + -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249, + 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384, + 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872, + 0.0875062221833; + + CALL_SUBTEST(res = i1e(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function j0. Reference results obtained with SciPy. + { + ArrayType x(77); + ArrayType expected(77); + ArrayType res(77); + + x << -38., -37., -36., -35., -34., -33., -32., -31., -30., + -29., -28., -27., -26., -25., -24., -23., -22., -21., -20., -19., + -18., -17., -16., -15., -14., -13., -12., -11., -10., -9., -8., + -7., -6., -5., -4., -3., -2., -1., 0., 1., 2., 3., + 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., + 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., + 37., 38.; + + expected << 0.11433274, 0.01086237, -0.10556738, + -0.12684568, -0.03042119, 0.09727067, 0.13807901, 0.05120815, + -0.08636798, -0.14784876, -0.07315701, 0.07274192, 0.15599932, + 0.09626678, -0.05623027, -0.16241278, -0.12065148, 0.03657907, + 0.16702466, 0.14662944, -0.01335581, -0.16985425, -0.17489907, + -0.01422447, 0.17107348, 0.2069261 , 0.04768931, -0.1711903 , + -0.24593576, -0.09033361, 0.17165081, 0.30007927, 0.15064526, + -0.17759677, -0.39714981, -0.26005195, 0.22389078, 0.76519769, + 1. , 0.76519769, 0.22389078, -0.26005195, -0.39714981, + -0.17759677, 0.15064526, 0.30007927, 0.17165081, -0.09033361, + -0.24593576, -0.1711903 , 0.04768931, 0.2069261 , 0.17107348, + -0.01422447, -0.17489907, -0.16985425, -0.01335581, 0.14662944, + 0.16702466, 0.03657907, -0.12065148, -0.16241278, -0.05623027, + 0.09626678, 0.15599932, 0.07274192, -0.07315701, -0.14784876, + -0.08636798, 0.05120815, 0.13807901, 0.09727067, -0.03042119, + -0.12684568, -0.10556738, 0.01086237, 0.11433274; + + CALL_SUBTEST(res = j0(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function j1. Reference results obtained with SciPy. + { + ArrayType x(81); + ArrayType expected(81); + ArrayType res(81); + + x << -40., -39., -38., -37., -36., -35., -34., -33., -32., -31., -30., + -29., -28., -27., -26., -25., -24., -23., -22., -21., -20., -19., + -18., -17., -16., -15., -14., -13., -12., -11., -10., -9., -8., + -7., -6., -5., -4., -3., -2., -1., 0., 1., 2., 3., + 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., + 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., + 37., 38., 39., 40.; + + expected << -0.12603832, -0.0640561 , 0.05916189, 0.13058004, 0.08232981, + -0.04399094, -0.13297118, -0.10061965, 0.02658903, 0.13302432, + 0.11875106, -0.0069342 , -0.13055149, -0.13658472, -0.01504573, + 0.12535025, 0.15403807, 0.03951932, -0.11717779, -0.17112027, + -0.06683312, 0.10570143, 0.18799489, 0.09766849, -0.09039718, + -0.20510404, -0.13337515, 0.07031805, 0.2234471 , 0.1767853 , + -0.04347275, -0.24531179, -0.23463635, 0.00468282, 0.27668386, + 0.32757914, 0.06604333, -0.33905896, -0.57672481, -0.44005059, + 0. , 0.44005059, 0.57672481, 0.33905896, -0.06604333, + -0.32757914, -0.27668386, -0.00468282, 0.23463635, 0.24531179, + 0.04347275, -0.1767853 , -0.2234471 , -0.07031805, 0.13337515, + 0.20510404, 0.09039718, -0.09766849, -0.18799489, -0.10570143, + 0.06683312, 0.17112027, 0.11717779, -0.03951932, -0.15403807, + -0.12535025, 0.01504573, 0.13658472, 0.13055149, 0.0069342 , + -0.11875106, -0.13302432, -0.02658903, 0.10061965, 0.13297118, + 0.04399094, -0.08232981, -0.13058004, -0.05916189, 0.0640561 , + 0.12603832; + + CALL_SUBTEST(res = j1(x); + verify_component_wise(res, expected);); + } + // Test Bessel function k0e. Reference results obtained with SciPy. + { + ArrayType x(42); + ArrayType expected(42); + ArrayType res(42); + + x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., + 39., 40.; + + expected << 1.97933385, 1.52410939, 1.14446308, 0.84156822, + 0.6977616 , 0.60929767, 0.54780756, 0.50186313, 0.4658451 , + 0.43662302, 0.41229555, 0.39163193, 0.3737955 , 0.35819488, + 0.34439865, 0.33208364, 0.32100235, 0.31096159, 0.30180802, + 0.29341821, 0.28569149, 0.27854488, 0.2719092 , 0.26572635, + 0.25994703, 0.25452917, 0.2494366 , 0.24463801, 0.24010616, + 0.23581722, 0.23175022, 0.22788667, 0.22421014, 0.22070602, + 0.21736123, 0.21416406, 0.21110397, 0.20817141, 0.20535778, + 0.20265524, 0.20005668, 0.19755558; + + CALL_SUBTEST(res = k0e(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function k0. Reference results obtained with SciPy. + { + ArrayType x(42); + ArrayType expected(42); + ArrayType res(42); + + x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., + 39., 40.; + + expected << 1.54150675, 0.92441907, 4.21024438e-01, 1.13893873e-01, + 3.47395044e-02, 1.11596761e-02, 3.69109833e-03, 1.24399433e-03, + 4.24795742e-04, 1.46470705e-04, 5.08813130e-05, 1.77800623e-05, + 6.24302055e-06, 2.20082540e-06, 7.78454386e-07, 2.76137082e-07, + 9.81953648e-08, 3.49941166e-08, 1.24946640e-08, 4.46875334e-09, + 1.60067129e-09, 5.74123782e-10, 2.06176797e-10, 7.41235161e-11, + 2.66754511e-11, 9.60881878e-12, 3.46416156e-12, 1.24987740e-12, + 4.51286453e-13, 1.63053459e-13, 5.89495073e-14, 2.13247750e-14, + 7.71838266e-15, 2.79505752e-15, 1.01266123e-15, 3.67057597e-16, + 1.33103515e-16, 4.82858338e-17, 1.75232770e-17, 6.36161716e-18, + 2.31029936e-18, 8.39286110e-19; + + CALL_SUBTEST(res = k0(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function k0e. Reference results obtained with SciPy. + { + ArrayType x(42); + ArrayType expected(42); + ArrayType res(42); + + x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., + 39., 40.; + + expected << 1.97933385, 1.52410939, 1.14446308, 0.84156822, + 0.6977616 , 0.60929767, 0.54780756, 0.50186313, + 0.4658451 , 0.43662302, 0.41229555, 0.39163193, + 0.3737955 , 0.35819488, 0.34439865, 0.33208364, + 0.32100235, 0.31096159, 0.30180802, 0.29341821, + 0.28569149, 0.27854488, 0.2719092 , 0.26572635, + 0.25994703, 0.25452917, 0.2494366 , 0.24463801, + 0.24010616, 0.23581722, 0.23175022, 0.22788667, + 0.22421014, 0.22070602, 0.21736123, 0.21416406, + 0.21110397, 0.20817141, 0.20535778, 0.20265524, + 0.20005668, 0.19755558; + + CALL_SUBTEST(res = k0e(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function k1. Reference results obtained with SciPy. + { + ArrayType x(42); + ArrayType expected(42); + ArrayType res(42); + + x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., + 39., 40.; + + expected << 3.74702597, 1.65644112, 6.01907230e-01, 1.39865882e-01, + 4.01564311e-02, 1.24834989e-02, 4.04461345e-03, 1.34391972e-03, + 4.54182487e-04, 1.55369212e-04, 5.36370164e-05, 1.86487735e-05, + 6.52086067e-06, 2.29075746e-06, 8.07858841e-07, 2.85834365e-07, + 1.01417294e-07, 3.60715712e-08, 1.28570417e-08, 4.59124963e-09, + 1.64226697e-09, 5.88305797e-10, 2.11029922e-10, 7.57898116e-11, + 2.72493059e-11, 9.80699893e-12, 3.53277807e-12, 1.27369078e-12, + 4.59568940e-13, 1.65940011e-13, 5.99574032e-14, 2.16773200e-14, + 7.84189960e-15, 2.83839927e-15, 1.02789171e-15, 3.72416929e-16, + 1.34991783e-16, 4.89519373e-17, 1.77585196e-17, 6.44478588e-18, + 2.33973340e-18, 8.49713195e-19; + + CALL_SUBTEST(res = k1(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function k1e. Reference results obtained with SciPy. + { + ArrayType x(42); + ArrayType expected(42); + ArrayType res(42); + + x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., + 39., 40.; + + expected << 4.81127659, 2.73100971, 1.63615349, 1.03347685, + 0.80656348, 0.68157595, 0.60027386, 0.54217591, + 0.49807158, 0.46314909, 0.43462525, 0.41076657, + 0.39043094, 0.37283175, 0.35740757, 0.34374563, + 0.33153489, 0.32053597, 0.31056123, 0.30146131, + 0.29311559, 0.2854255 , 0.27830958, 0.27169987, + 0.26553913, 0.25977879, 0.25437733, 0.249299 , + 0.24451285, 0.23999191, 0.2357126 , 0.23165413, + 0.22779816, 0.22412841, 0.22063036, 0.21729103, + 0.21409878, 0.21104314, 0.20811462, 0.20530466, + 0.20260547, 0.20000997; + + CALL_SUBTEST(res = k1e(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function y0. Reference results obtained with SciPy. + { + ArrayType x(42); + ArrayType expected(42); + ArrayType res(42); + + x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., + 39., 40.; + + expected << -0.93157302, -0.44451873, 0.08825696, 0.51037567, 0.37685001, + -0.01694074, -0.30851763, -0.28819468, -0.02594974, 0.22352149, + 0.2499367 , 0.05567117, -0.16884732, -0.22523731, -0.07820786, + 0.12719257, 0.2054643 , 0.095811 , -0.0926372 , -0.18755216, + -0.10951969, 0.0626406 , 0.17020176, 0.1198876 , -0.03598179, + -0.15283403, -0.12724943, 0.01204463, 0.13521498, 0.13183647, + 0.00948116, -0.11729573, -0.13383266, -0.02874248, 0.09913483, + 0.13340405, 0.04579799, -0.08085609, -0.13071488, -0.06066076, + 0.06262353, 0.12593642; + + CALL_SUBTEST(res = y0(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function y1. Reference results obtained with SciPy. + { + ArrayType x(42); + ArrayType expected(42); + ArrayType res(42); + + x << 0.25, 0.5, 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., + 39., 40.; + + expected << -2.70410523, -1.47147239, -0.78121282, -0.10703243, + 0.32467442, 0.39792571, 0.14786314, -0.17501034, -0.30266724, + -0.15806046, 0.10431458, 0.24901542, 0.16370554, -0.05709922, + -0.21008141, -0.16664484, 0.02107363, 0.17797517, 0.16720504, + 0.00815513, -0.14956011, -0.16551161, -0.03253926, 0.12340586, + 0.1616692 , 0.05305978, -0.09882996, -0.15579655, -0.07025124, + 0.07552213, 0.14803412, 0.08442557, -0.05337283, -0.13854483, + -0.09578012, 0.03238588, 0.12751273, 0.10445477, -0.01262946, + -0.11514066, -0.11056411, -0.00579351; + + CALL_SUBTEST(res = y1(x); + verify_component_wise(res, expected);); + } +} + +EIGEN_DECLARE_TEST(bessel_functions) +{ + CALL_SUBTEST_1(array_bessel_functions()); + CALL_SUBTEST_2(array_bessel_functions()); +} diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp index 140a5e4c1..c104ac3c5 100644 --- a/unsupported/test/special_functions.cpp +++ b/unsupported/test/special_functions.cpp @@ -357,47 +357,7 @@ template void array_special_functions() } #endif // EIGEN_HAS_C99_MATH - // Test Bessel function i0e. Reference results obtained with SciPy. - { - ArrayType x(21); - ArrayType expected(21); - ArrayType res(21); - - x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, - 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; - - expected << 0.0897803118848, 0.0947062952128, 0.100544127361, - 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857, - 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554, - 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163, - 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128, - 0.0897803118848; - - CALL_SUBTEST(res = i0e(x); - verify_component_wise(res, expected);); - } - - // Test Bessel function i1e. Reference results obtained with SciPy. - { - ArrayType x(21); - ArrayType expected(21); - ArrayType res(21); - - x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, - 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; - - expected << -0.0875062221833, -0.092036796872, -0.0973496147565, - -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293, - -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249, - 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384, - 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872, - 0.0875062221833; - - CALL_SUBTEST(res = i1e(x); - verify_component_wise(res, expected);); - } - - /* Code to generate the data for the following two test cases. + /* Code to generate the data for the following two test cases. N = 5 np.random.seed(3)