mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-05 17:50:26 +08:00
Merged in mfigurnov/eigen (pull request PR-400)
Exponentially scaled modified Bessel functions of order zero and one. Approved-by: Benoit Steiner <benoit.steiner.goog@gmail.com>
This commit is contained in:
commit
e206f8d4a4
@ -82,6 +82,8 @@ struct default_packet_traits
|
||||
HasPolygamma = 0,
|
||||
HasErf = 0,
|
||||
HasErfc = 0,
|
||||
HasI0e = 0,
|
||||
HasI1e = 0,
|
||||
HasIGamma = 0,
|
||||
HasIGammac = 0,
|
||||
HasBetaInc = 0,
|
||||
|
@ -44,6 +44,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasI0e = 1,
|
||||
HasI1e = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
@ -73,6 +75,8 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasI0e = 1,
|
||||
HasI1e = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
|
@ -133,6 +133,18 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return unaryExpr(internal::scalar_digamma_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i0e_op<Scalar>, const Derived>
|
||||
i0e() const {
|
||||
return unaryExpr(internal::scalar_i0e_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i1e_op<Scalar>, const Derived>
|
||||
i1e() const {
|
||||
return unaryExpr(internal::scalar_i1e_op<Scalar>());
|
||||
}
|
||||
|
||||
// igamma(a = this, x = other)
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived>
|
||||
|
@ -34,6 +34,8 @@ namespace Eigen {
|
||||
* - polygamma
|
||||
* - zeta
|
||||
* - betainc
|
||||
* - i0e
|
||||
* - i1e
|
||||
*
|
||||
* \code
|
||||
* #include <unsupported/Eigen/SpecialFunctions>
|
||||
|
@ -119,6 +119,52 @@ zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
|
||||
);
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise i0e(\a x) to the given
|
||||
* arrays.
|
||||
*
|
||||
* It returns the exponentially scaled modified Bessel
|
||||
* function of order zero.
|
||||
*
|
||||
* \param x is the argument
|
||||
*
|
||||
* \note This function supports only float and double scalar types. To support
|
||||
* other scalar types, the user has to provide implementations of i0e(T) for
|
||||
* any scalar type T to be supported.
|
||||
*
|
||||
* \sa ArrayBase::i0e()
|
||||
*/
|
||||
template <typename Derived>
|
||||
inline const Eigen::CwiseUnaryOp<
|
||||
Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, const Derived>
|
||||
i0e(const Eigen::ArrayBase<Derived>& x) {
|
||||
return Eigen::CwiseUnaryOp<
|
||||
Eigen::internal::scalar_i0e_op<typename Derived::Scalar>,
|
||||
const Derived>(x.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise i1e(\a x) to the given
|
||||
* arrays.
|
||||
*
|
||||
* It returns the exponentially scaled modified Bessel
|
||||
* function of order one.
|
||||
*
|
||||
* \param x is the argument
|
||||
*
|
||||
* \note This function supports only float and double scalar types. To support
|
||||
* other scalar types, the user has to provide implementations of i1e(T) for
|
||||
* any scalar type T to be supported.
|
||||
*
|
||||
* \sa ArrayBase::i1e()
|
||||
*/
|
||||
template <typename Derived>
|
||||
inline const Eigen::CwiseUnaryOp<
|
||||
Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, const Derived>
|
||||
i1e(const Eigen::ArrayBase<Derived>& x) {
|
||||
return Eigen::CwiseUnaryOp<
|
||||
Eigen::internal::scalar_i1e_op<typename Derived::Scalar>,
|
||||
const Derived>(x.derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
|
||||
|
@ -229,6 +229,60 @@ struct functor_traits<scalar_erfc_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the exponentially scaled modified Bessel
|
||||
* function of order zero
|
||||
* \sa class CwiseUnaryOp, Cwise::i0e()
|
||||
*/
|
||||
template <typename Scalar>
|
||||
struct scalar_i0e_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
|
||||
using numext::i0e;
|
||||
return i0e(x);
|
||||
}
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
|
||||
return internal::pi0e(x);
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct functor_traits<scalar_i0e_op<Scalar> > {
|
||||
enum {
|
||||
// On average, a Chebyshev polynomial of order N=20 is computed.
|
||||
// The cost is N multiplications and 2N additions.
|
||||
Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasI0e
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the exponentially scaled modified Bessel
|
||||
* function of order zero
|
||||
* \sa class CwiseUnaryOp, Cwise::i1e()
|
||||
*/
|
||||
template <typename Scalar>
|
||||
struct scalar_i1e_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
|
||||
using numext::i1e;
|
||||
return i1e(x);
|
||||
}
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
|
||||
return internal::pi1e(x);
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct functor_traits<scalar_i1e_op<Scalar> > {
|
||||
enum {
|
||||
// On average, a Chebyshev polynomial of order N=20 is computed.
|
||||
// The cost is N multiplications and 2N additions.
|
||||
Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasI1e
|
||||
};
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -39,6 +39,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) {
|
||||
return Eigen::half(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0e(const Eigen::half& x) {
|
||||
return Eigen::half(Eigen::numext::i0e(static_cast<float>(x)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1e(const Eigen::half& x) {
|
||||
return Eigen::half(Eigen::numext::i1e(static_cast<float>(x)));
|
||||
}
|
||||
#endif
|
||||
|
||||
} // end namespace numext
|
||||
|
@ -95,6 +95,75 @@ struct polevl<Scalar, 0> {
|
||||
}
|
||||
};
|
||||
|
||||
/* chbevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate Chebyshev series
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* Scalar x, y, coef[N], chebevl();
|
||||
*
|
||||
* y = chbevl( x, coef, N );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates the series
|
||||
*
|
||||
* N-1
|
||||
* - '
|
||||
* y = > coef[i] T (x/2)
|
||||
* - i
|
||||
* i=0
|
||||
*
|
||||
* of Chebyshev polynomials Ti at argument x/2.
|
||||
*
|
||||
* Coefficients are stored in reverse order, i.e. the zero
|
||||
* order term is last in the array. Note N is the number of
|
||||
* coefficients, not the order.
|
||||
*
|
||||
* If coefficients are for the interval a to b, x must
|
||||
* have been transformed to x -> 2(2x - b - a)/(b-a) before
|
||||
* entering the routine. This maps x from (a, b) to (-1, 1),
|
||||
* over which the Chebyshev polynomials are defined.
|
||||
*
|
||||
* If the coefficients are for the inverted interval, in
|
||||
* which (a, b) is mapped to (1/b, 1/a), the transformation
|
||||
* required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
|
||||
* this becomes x -> 4a/x - 1.
|
||||
*
|
||||
*
|
||||
*
|
||||
* SPEED:
|
||||
*
|
||||
* Taking advantage of the recurrence properties of the
|
||||
* Chebyshev polynomials, the routine requires one more
|
||||
* addition per loop than evaluating a nested polynomial of
|
||||
* the same degree.
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, int N>
|
||||
struct chebevl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(Scalar x, const Scalar coef[]) {
|
||||
Scalar b0 = coef[0];
|
||||
Scalar b1 = 0;
|
||||
Scalar b2;
|
||||
|
||||
for (int i = 1; i < N; i++) {
|
||||
b2 = b1;
|
||||
b1 = b0;
|
||||
b0 = x * b1 - b2 + coef[i];
|
||||
}
|
||||
|
||||
return Scalar(0.5) * (b0 - b2);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace cephes
|
||||
|
||||
/****************************************************************************
|
||||
@ -1505,6 +1574,334 @@ struct betainc_impl<double> {
|
||||
}
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of Bessel function, based on Cephes *
|
||||
****************************************************************************/
|
||||
|
||||
template <typename Scalar>
|
||||
struct i0e_retval {
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct i0e_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct i0e_impl<float> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE float run(float x) {
|
||||
/* i0ef.c
|
||||
*
|
||||
* Modified Bessel function of order zero,
|
||||
* exponentially scaled
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* float x, y, i0ef();
|
||||
*
|
||||
* y = i0ef( x );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Returns exponentially scaled modified Bessel function
|
||||
* of order zero of the argument.
|
||||
*
|
||||
* The function is defined as i0e(x) = exp(-|x|) j0( ix ).
|
||||
*
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0,30 100000 3.7e-7 7.0e-8
|
||||
* See i0f().
|
||||
*
|
||||
*/
|
||||
const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f,
|
||||
-2.67079385394061173391E-7f, 1.11738753912010371815E-6f,
|
||||
-4.41673835845875056359E-6f, 1.64484480707288970893E-5f,
|
||||
-5.75419501008210370398E-5f, 1.88502885095841655729E-4f,
|
||||
-5.76375574538582365885E-4f, 1.63947561694133579842E-3f,
|
||||
-4.32430999505057594430E-3f, 1.05464603945949983183E-2f,
|
||||
-2.37374148058994688156E-2f, 4.93052842396707084878E-2f,
|
||||
-9.49010970480476444210E-2f, 1.71620901522208775349E-1f,
|
||||
-3.04682672343198398683E-1f, 6.76795274409476084995E-1f};
|
||||
|
||||
const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f,
|
||||
2.04891858946906374183E-7f, 2.89137052083475648297E-6f,
|
||||
6.88975834691682398426E-5f, 3.36911647825569408990E-3f,
|
||||
8.04490411014108831608E-1f};
|
||||
if (x < 0.0f) {
|
||||
x = -x;
|
||||
}
|
||||
|
||||
if (x <= 8.0f) {
|
||||
float y = 0.5f * x - 2.0f;
|
||||
return cephes::chebevl<float, 18>::run(y, A);
|
||||
}
|
||||
|
||||
return cephes::chebevl<float, 7>::run(32.0f / x - 2.0f, B) / numext::sqrt(x);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct i0e_impl<double> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(double x) {
|
||||
/* i0e.c
|
||||
*
|
||||
* Modified Bessel function of order zero,
|
||||
* exponentially scaled
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* double x, y, i0e();
|
||||
*
|
||||
* y = i0e( x );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Returns exponentially scaled modified Bessel function
|
||||
* of order zero of the argument.
|
||||
*
|
||||
* The function is defined as i0e(x) = exp(-|x|) j0( ix ).
|
||||
*
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0,30 30000 5.4e-16 1.2e-16
|
||||
* See i0().
|
||||
*
|
||||
*/
|
||||
const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17,
|
||||
-2.43127984654795469359E-16, 1.71539128555513303061E-15,
|
||||
-1.16853328779934516808E-14, 7.67618549860493561688E-14,
|
||||
-4.85644678311192946090E-13, 2.95505266312963983461E-12,
|
||||
-1.72682629144155570723E-11, 9.67580903537323691224E-11,
|
||||
-5.18979560163526290666E-10, 2.65982372468238665035E-9,
|
||||
-1.30002500998624804212E-8, 6.04699502254191894932E-8,
|
||||
-2.67079385394061173391E-7, 1.11738753912010371815E-6,
|
||||
-4.41673835845875056359E-6, 1.64484480707288970893E-5,
|
||||
-5.75419501008210370398E-5, 1.88502885095841655729E-4,
|
||||
-5.76375574538582365885E-4, 1.63947561694133579842E-3,
|
||||
-4.32430999505057594430E-3, 1.05464603945949983183E-2,
|
||||
-2.37374148058994688156E-2, 4.93052842396707084878E-2,
|
||||
-9.49010970480476444210E-2, 1.71620901522208775349E-1,
|
||||
-3.04682672343198398683E-1, 6.76795274409476084995E-1};
|
||||
const double B[] = {
|
||||
-7.23318048787475395456E-18, -4.83050448594418207126E-18,
|
||||
4.46562142029675999901E-17, 3.46122286769746109310E-17,
|
||||
-2.82762398051658348494E-16, -3.42548561967721913462E-16,
|
||||
1.77256013305652638360E-15, 3.81168066935262242075E-15,
|
||||
-9.55484669882830764870E-15, -4.15056934728722208663E-14,
|
||||
1.54008621752140982691E-14, 3.85277838274214270114E-13,
|
||||
7.18012445138366623367E-13, -1.79417853150680611778E-12,
|
||||
-1.32158118404477131188E-11, -3.14991652796324136454E-11,
|
||||
1.18891471078464383424E-11, 4.94060238822496958910E-10,
|
||||
3.39623202570838634515E-9, 2.26666899049817806459E-8,
|
||||
2.04891858946906374183E-7, 2.89137052083475648297E-6,
|
||||
6.88975834691682398426E-5, 3.36911647825569408990E-3,
|
||||
8.04490411014108831608E-1};
|
||||
|
||||
if (x < 0.0) {
|
||||
x = -x;
|
||||
}
|
||||
|
||||
if (x <= 8.0) {
|
||||
double y = (x / 2.0) - 2.0;
|
||||
return cephes::chebevl<double, 30>::run(y, A);
|
||||
}
|
||||
|
||||
return cephes::chebevl<double, 25>::run(32.0 / x - 2.0, B) /
|
||||
numext::sqrt(x);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct i1e_retval {
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct i1e_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct i1e_impl<float> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE float run(float x) {
|
||||
/* i1ef.c
|
||||
*
|
||||
* Modified Bessel function of order one,
|
||||
* exponentially scaled
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* float x, y, i1ef();
|
||||
*
|
||||
* y = i1ef( x );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Returns exponentially scaled modified Bessel function
|
||||
* of order one of the argument.
|
||||
*
|
||||
* The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
|
||||
*
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0, 30 30000 1.5e-6 1.5e-7
|
||||
* See i1().
|
||||
*
|
||||
*/
|
||||
const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f,
|
||||
2.00329475355213526229E-7f, -8.56872026469545474066E-7f,
|
||||
3.47025130813767847674E-6f, -1.32731636560394358279E-5f,
|
||||
4.78156510755005422638E-5f, -1.61760815825896745588E-4f,
|
||||
5.12285956168575772895E-4f, -1.51357245063125314899E-3f,
|
||||
4.15642294431288815669E-3f, -1.05640848946261981558E-2f,
|
||||
2.47264490306265168283E-2f, -5.29459812080949914269E-2f,
|
||||
1.02643658689847095384E-1f, -1.76416518357834055153E-1f,
|
||||
2.52587186443633654823E-1f};
|
||||
|
||||
const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f,
|
||||
-2.51223623787020892529E-7f, -3.88256480887769039346E-6f,
|
||||
-1.10588938762623716291E-4f, -9.76109749136146840777E-3f,
|
||||
7.78576235018280120474E-1f};
|
||||
|
||||
float z = numext::abs(x);
|
||||
|
||||
if (z <= 8.0f) {
|
||||
float y = 0.5f * z - 2.0f;
|
||||
z = cephes::chebevl<float, 17>::run(y, A) * z;
|
||||
} else {
|
||||
z = cephes::chebevl<float, 7>::run(32.0f / z - 2.0f, B) / numext::sqrt(z);
|
||||
}
|
||||
|
||||
if (x < 0.0f) {
|
||||
z = -z;
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct i1e_impl<double> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(double x) {
|
||||
/* i1e.c
|
||||
*
|
||||
* Modified Bessel function of order one,
|
||||
* exponentially scaled
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* double x, y, i1e();
|
||||
*
|
||||
* y = i1e( x );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Returns exponentially scaled modified Bessel function
|
||||
* of order one of the argument.
|
||||
*
|
||||
* The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
|
||||
*
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0, 30 30000 2.0e-15 2.0e-16
|
||||
* See i1().
|
||||
*
|
||||
*/
|
||||
const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17,
|
||||
1.55363195773620046921E-16, -1.10559694773538630805E-15,
|
||||
7.60068429473540693410E-15, -5.04218550472791168711E-14,
|
||||
3.22379336594557470981E-13, -1.98397439776494371520E-12,
|
||||
1.17361862988909016308E-11, -6.66348972350202774223E-11,
|
||||
3.62559028155211703701E-10, -1.88724975172282928790E-9,
|
||||
9.38153738649577178388E-9, -4.44505912879632808065E-8,
|
||||
2.00329475355213526229E-7, -8.56872026469545474066E-7,
|
||||
3.47025130813767847674E-6, -1.32731636560394358279E-5,
|
||||
4.78156510755005422638E-5, -1.61760815825896745588E-4,
|
||||
5.12285956168575772895E-4, -1.51357245063125314899E-3,
|
||||
4.15642294431288815669E-3, -1.05640848946261981558E-2,
|
||||
2.47264490306265168283E-2, -5.29459812080949914269E-2,
|
||||
1.02643658689847095384E-1, -1.76416518357834055153E-1,
|
||||
2.52587186443633654823E-1};
|
||||
const double B[] = {
|
||||
7.51729631084210481353E-18, 4.41434832307170791151E-18,
|
||||
-4.65030536848935832153E-17, -3.20952592199342395980E-17,
|
||||
2.96262899764595013876E-16, 3.30820231092092828324E-16,
|
||||
-1.88035477551078244854E-15, -3.81440307243700780478E-15,
|
||||
1.04202769841288027642E-14, 4.27244001671195135429E-14,
|
||||
-2.10154184277266431302E-14, -4.08355111109219731823E-13,
|
||||
-7.19855177624590851209E-13, 2.03562854414708950722E-12,
|
||||
1.41258074366137813316E-11, 3.25260358301548823856E-11,
|
||||
-1.89749581235054123450E-11, -5.58974346219658380687E-10,
|
||||
-3.83538038596423702205E-9, -2.63146884688951950684E-8,
|
||||
-2.51223623787020892529E-7, -3.88256480887769039346E-6,
|
||||
-1.10588938762623716291E-4, -9.76109749136146840777E-3,
|
||||
7.78576235018280120474E-1};
|
||||
|
||||
double z = numext::abs(x);
|
||||
|
||||
if (z <= 8.0) {
|
||||
double y = (z / 2.0) - 2.0;
|
||||
z = cephes::chebevl<double, 29>::run(y, A) * z;
|
||||
} else {
|
||||
z = cephes::chebevl<double, 25>::run(32.0 / z - 2.0, B) / numext::sqrt(z);
|
||||
}
|
||||
|
||||
if (x < 0.0) {
|
||||
z = -z;
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
} // end namespace internal
|
||||
@ -1565,6 +1962,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar)
|
||||
return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0e, Scalar)
|
||||
i0e(const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(i0e, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1e, Scalar)
|
||||
i1e(const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(i1e, Scalar)::run(x);
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
|
||||
|
@ -50,6 +50,18 @@ Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; retur
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); }
|
||||
|
||||
/** \internal \returns the exponentially scaled modified Bessel function of
|
||||
* order zero i0e(\a a) (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pi0e(const Packet& x) { using numext::i0e; return i0e(x); }
|
||||
|
||||
/** \internal \returns the exponentially scaled modified Bessel function of
|
||||
* order one i1e(\a a) (coeff-wise) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pi1e(const Packet& x) { using numext::i1e; return i1e(x); }
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -156,6 +156,32 @@ double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x)
|
||||
return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi0e<float4>(const float4& x) {
|
||||
using numext::i0e;
|
||||
return make_float4(i0e(x.x), i0e(x.y), i0e(x.z), i0e(x.w));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||
pi0e<double2>(const double2& x) {
|
||||
using numext::i0e;
|
||||
return make_double2(i0e(x.x), i0e(x.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi1e<float4>(const float4& x) {
|
||||
using numext::i1e;
|
||||
return make_float4(i1e(x.x), i1e(x.y), i1e(x.z), i1e(x.w));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||
pi1e<double2>(const double2& x) {
|
||||
using numext::i1e;
|
||||
return make_double2(i1e(x.x), i1e(x.y));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -1208,6 +1208,116 @@ void test_cuda_betainc()
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_i0e()
|
||||
{
|
||||
Tensor<Scalar, 1> in_x(21);
|
||||
Tensor<Scalar, 1> out(21);
|
||||
Tensor<Scalar, 1> expected_out(21);
|
||||
out.setZero();
|
||||
|
||||
Array<Scalar, 1, Dynamic> in_x_array(21);
|
||||
Array<Scalar, 1, Dynamic> expected_out_array(21);
|
||||
|
||||
in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
|
||||
-2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
|
||||
|
||||
expected_out_array << 0.0897803118848, 0.0947062952128, 0.100544127361,
|
||||
0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
|
||||
0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
|
||||
0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
|
||||
0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
|
||||
0.0897803118848;
|
||||
|
||||
for (int i = 0; i < 21; ++i) {
|
||||
in_x(i) = in_x_array(i);
|
||||
expected_out(i) = expected_out_array(i);
|
||||
}
|
||||
|
||||
std::size_t bytes = in_x.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in.i0e();
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
|
||||
gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 21; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
cudaFree(d_in);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_i1e()
|
||||
{
|
||||
Tensor<Scalar, 1> in_x(21);
|
||||
Tensor<Scalar, 1> out(21);
|
||||
Tensor<Scalar, 1> expected_out(21);
|
||||
out.setZero();
|
||||
|
||||
Array<Scalar, 1, Dynamic> in_x_array(21);
|
||||
Array<Scalar, 1, Dynamic> expected_out_array(21);
|
||||
|
||||
in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
|
||||
-2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
|
||||
|
||||
expected_out_array << -0.0875062221833, -0.092036796872, -0.0973496147565,
|
||||
-0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
|
||||
-0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
|
||||
0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
|
||||
0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
|
||||
0.0875062221833;
|
||||
|
||||
for (int i = 0; i < 21; ++i) {
|
||||
in_x(i) = in_x_array(i);
|
||||
expected_out(i) = expected_out_array(i);
|
||||
}
|
||||
|
||||
std::size_t bytes = in_x.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in.i1e();
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
|
||||
gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 21; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
cudaFree(d_in);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_cuda()
|
||||
{
|
||||
@ -1280,5 +1390,11 @@ void test_cxx11_tensor_cuda()
|
||||
|
||||
CALL_SUBTEST_6(test_cuda_betainc<float>());
|
||||
CALL_SUBTEST_6(test_cuda_betainc<double>());
|
||||
|
||||
CALL_SUBTEST_6(test_cuda_i0e<float>());
|
||||
CALL_SUBTEST_6(test_cuda_i0e<double>());
|
||||
|
||||
CALL_SUBTEST_6(test_cuda_i1e<float>());
|
||||
CALL_SUBTEST_6(test_cuda_i1e<double>());
|
||||
#endif
|
||||
}
|
||||
|
@ -335,6 +335,46 @@ template<typename ArrayType> void array_special_functions()
|
||||
ArrayType test = betainc(a, b + one, x) + eps;
|
||||
verify_component_wise(test, expected););
|
||||
}
|
||||
|
||||
// Test Bessel function i0e. Reference results obtained with SciPy.
|
||||
{
|
||||
ArrayType x(21);
|
||||
ArrayType expected(21);
|
||||
ArrayType res(21);
|
||||
|
||||
x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
|
||||
2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
|
||||
|
||||
expected << 0.0897803118848, 0.0947062952128, 0.100544127361,
|
||||
0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
|
||||
0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
|
||||
0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
|
||||
0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
|
||||
0.0897803118848;
|
||||
|
||||
CALL_SUBTEST(res = i0e(x);
|
||||
verify_component_wise(res, expected););
|
||||
}
|
||||
|
||||
// Test Bessel function i1e. Reference results obtained with SciPy.
|
||||
{
|
||||
ArrayType x(21);
|
||||
ArrayType expected(21);
|
||||
ArrayType res(21);
|
||||
|
||||
x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
|
||||
2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
|
||||
|
||||
expected << -0.0875062221833, -0.092036796872, -0.0973496147565,
|
||||
-0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
|
||||
-0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
|
||||
0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
|
||||
0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
|
||||
0.0875062221833;
|
||||
|
||||
CALL_SUBTEST(res = i1e(x);
|
||||
verify_component_wise(res, expected););
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user