mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
merge
This commit is contained in:
commit
774dba87c8
@ -300,6 +300,7 @@ using std::ptrdiff_t;
|
||||
|
||||
#include "src/Core/NumTraits.h"
|
||||
#include "src/Core/MathFunctions.h"
|
||||
#include "src/Core/SpecialFunctions.h"
|
||||
#include "src/Core/GenericPacketMath.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX
|
||||
|
@ -43,7 +43,7 @@ struct default_packet_traits
|
||||
{
|
||||
enum {
|
||||
HasHalfPacket = 0,
|
||||
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
@ -74,6 +74,9 @@ struct default_packet_traits
|
||||
HasSinh = 0,
|
||||
HasCosh = 0,
|
||||
HasTanh = 0,
|
||||
HasLGamma = 0,
|
||||
HasErf = 0,
|
||||
HasErfc = 0,
|
||||
|
||||
HasRound = 0,
|
||||
HasFloor = 0,
|
||||
@ -432,6 +435,18 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
|
||||
|
||||
/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
|
||||
|
||||
/** \internal \returns the erf(\a a) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
||||
|
||||
/** \internal \returns the erfc(\a a) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
||||
|
||||
/***************************************************************************
|
||||
* The following functions might not have to be overwritten for vectorized types
|
||||
***************************************************************************/
|
||||
|
@ -49,6 +49,9 @@ namespace Eigen
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
|
||||
|
@ -155,6 +155,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
#ifdef EIGEN_MAPBASE_PLUGIN
|
||||
#include EIGEN_MAPBASE_PLUGIN
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
|
160
Eigen/src/Core/SpecialFunctions.h
Normal file
160
Eigen/src/Core/SpecialFunctions.h
Normal file
@ -0,0 +1,160 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SPECIAL_FUNCTIONS_H
|
||||
#define EIGEN_SPECIAL_FUNCTIONS_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of lgamma *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct lgamma_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct lgamma_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template<>
|
||||
struct lgamma_impl<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct lgamma_impl<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); }
|
||||
};
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of erf *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct erf_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct erf_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template<>
|
||||
struct erf_impl<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct erf_impl<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); }
|
||||
};
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of erfc *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct erfc_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar& x)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct erfc_retval
|
||||
{
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template<>
|
||||
struct erfc_impl<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct erfc_impl<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
|
||||
};
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
namespace numext {
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x)
|
||||
{
|
||||
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SPECIAL_FUNCTIONS_H
|
@ -66,6 +66,43 @@ double2 prsqrt<double2>(const double2& a)
|
||||
return make_double2(rsqrt(a.x), rsqrt(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 plgamma<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 plgamma<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(lgamma(a.x), lgamma(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 perf<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 perf<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(erf(a.x), erf(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 perfc<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 perfc<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(erfc(a.x), erfc(a.y));
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -39,6 +39,9 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasLGamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
};
|
||||
@ -59,6 +62,9 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasLGamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
};
|
||||
|
@ -403,6 +403,77 @@ struct functor_traits<scalar_asin_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the natural log of the absolute
|
||||
* value of Gamma of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::lgamma()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_lgamma_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
|
||||
using numext::lgamma; return lgamma(a);
|
||||
}
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_lgamma_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
// Guesstimate
|
||||
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasLGamma
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the Gauss error function of a
|
||||
* scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::erf()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_erf_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
|
||||
using numext::erf; return erf(a);
|
||||
}
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_erf_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
// Guesstimate
|
||||
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasErf
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the Complementary Error Function
|
||||
* of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::erfc()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_erfc_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
|
||||
using numext::erfc; return erfc(a);
|
||||
}
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_erfc_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
// Guesstimate
|
||||
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasErfc
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the atan of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::atan()
|
||||
@ -422,6 +493,7 @@ struct functor_traits<scalar_atan_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the tanh of a scalar
|
||||
* \sa class CwiseUnaryOp, ArrayBase::tanh()
|
||||
|
@ -341,6 +341,13 @@
|
||||
#define EIGEN_HAVE_RVALUE_REFERENCES
|
||||
#endif
|
||||
|
||||
// Does the compiler support C99?
|
||||
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
|
||||
|| (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
|
||||
|| (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
|
||||
#define EIGEN_HAS_C99_MATH 1
|
||||
#endif
|
||||
|
||||
// Does the compiler support result_of?
|
||||
#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))
|
||||
#define EIGEN_HAS_STD_RESULT_OF 1
|
||||
|
@ -96,7 +96,8 @@
|
||||
STORAGE_LAYOUT_DOES_NOT_MATCH,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
|
||||
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
|
||||
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY
|
||||
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
|
||||
THIS_TYPE_IS_NOT_SUPPORTED
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -666,7 +666,7 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, u
|
||||
|
||||
if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this);
|
||||
if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this);
|
||||
if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols);
|
||||
if(m_rows!=m_cols) m_scaledMatrix.resize(rows,cols);
|
||||
}
|
||||
|
||||
template<typename MatrixType, int QRPreconditioner>
|
||||
|
@ -21,6 +21,9 @@ typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturn
|
||||
typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
|
||||
@ -302,6 +305,47 @@ cosh() const
|
||||
return CoshReturnType(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise ln(|gamma(*this)|).
|
||||
*
|
||||
* Example: \include Cwise_lgamma.cpp
|
||||
* Output: \verbinclude Cwise_lgamma.out
|
||||
*
|
||||
* \sa cos(), sin(), tan()
|
||||
*/
|
||||
inline const LgammaReturnType
|
||||
lgamma() const
|
||||
{
|
||||
return LgammaReturnType(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise Gauss error
|
||||
* function of *this.
|
||||
*
|
||||
* Example: \include Cwise_erf.cpp
|
||||
* Output: \verbinclude Cwise_erf.out
|
||||
*
|
||||
* \sa cos(), sin(), tan()
|
||||
*/
|
||||
inline const ErfReturnType
|
||||
erf() const
|
||||
{
|
||||
return ErfReturnType(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise Complementary error
|
||||
* function of *this.
|
||||
*
|
||||
* Example: \include Cwise_erfc.cpp
|
||||
* Output: \verbinclude Cwise_erfc.out
|
||||
*
|
||||
* \sa cos(), sin(), tan()
|
||||
*/
|
||||
inline const ErfcReturnType
|
||||
erfc() const
|
||||
{
|
||||
return ErfcReturnType(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of *this to the given exponent.
|
||||
*
|
||||
* This function computes the coefficient-wise power. The function MatrixBase::pow() in the
|
||||
|
@ -106,6 +106,7 @@ following macros are supported; none of them are defined by default.
|
||||
- \b EIGEN_MATRIX_PLUGIN - filename of plugin for extending the Matrix class.
|
||||
- \b EIGEN_MATRIXBASE_PLUGIN - filename of plugin for extending the MatrixBase class.
|
||||
- \b EIGEN_PLAINOBJECTBASE_PLUGIN - filename of plugin for extending the PlainObjectBase class.
|
||||
- \b EIGEN_MAPBASE_PLUGIN - filename of plugin for extending the MapBase class.
|
||||
- \b EIGEN_QUATERNION_PLUGIN - filename of plugin for extending the Quaternion class.
|
||||
- \b EIGEN_QUATERNIONBASE_PLUGIN - filename of plugin for extending the QuaternionBase class.
|
||||
- \b EIGEN_SPARSEMATRIX_PLUGIN - filename of plugin for extending the SparseMatrix class.
|
||||
|
@ -202,7 +202,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
||||
m2 = ArrayType::Random(rows, cols),
|
||||
m3(rows, cols),
|
||||
m4 = m1;
|
||||
|
||||
|
||||
m4 = (m4.abs()==Scalar(0)).select(1,m4);
|
||||
|
||||
Scalar s1 = internal::random<Scalar>();
|
||||
@ -217,6 +217,11 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
||||
VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
|
||||
VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
|
||||
VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1));
|
||||
VERIFY_IS_APPROX(m1.erf(), erf(m1));
|
||||
VERIFY_IS_APPROX(m1.erfc(), erfc(m1));
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
VERIFY_IS_APPROX(m1.arg(), arg(m1));
|
||||
VERIFY_IS_APPROX(m1.round(), round(m1));
|
||||
VERIFY_IS_APPROX(m1.floor(), floor(m1));
|
||||
|
@ -338,7 +338,7 @@ template<typename Scalar> void packetmath_real()
|
||||
data1[1] = 0;
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
|
||||
VERIFY_IS_EQUAL(std::exp(0), data2[1]);
|
||||
VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
|
||||
|
||||
data1[0] = (std::numeric_limits<Scalar>::min)();
|
||||
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
||||
@ -353,15 +353,43 @@ template<typename Scalar> void packetmath_real()
|
||||
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
{
|
||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
packet_helper<internal::packet_traits<Scalar>::HasLGamma,Packet> h;
|
||||
h.store(data2, internal::plgamma(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
}
|
||||
{
|
||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
packet_helper<internal::packet_traits<Scalar>::HasErf,Packet> h;
|
||||
h.store(data2, internal::perf(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
}
|
||||
{
|
||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
packet_helper<internal::packet_traits<Scalar>::HasErfc,Packet> h;
|
||||
h.store(data2, internal::perfc(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
}
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
{
|
||||
data1[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
|
||||
data2[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
|
||||
}
|
||||
|
||||
if(internal::random<float>(0,1)<0.1)
|
||||
data1[internal::random<int>(0, PacketSize)] = 0;
|
||||
CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
|
||||
#if defined(EIGEN_HAS_C99_MATH) && (__cplusplus > 199711L)
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);
|
||||
#endif
|
||||
|
||||
if(PacketTraits::HasLog && PacketTraits::size>=2)
|
||||
{
|
||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
@ -375,7 +403,7 @@ template<typename Scalar> void packetmath_real()
|
||||
data1[1] = 0;
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
VERIFY_IS_EQUAL(std::log(0), data2[1]);
|
||||
VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
|
||||
|
||||
data1[0] = (std::numeric_limits<Scalar>::min)();
|
||||
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
||||
|
@ -122,6 +122,24 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return unaryExpr(internal::scalar_tanh_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived>
|
||||
lgamma() const {
|
||||
return unaryExpr(internal::scalar_lgamma_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
|
||||
erf() const {
|
||||
return unaryExpr(internal::scalar_erf_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived>
|
||||
erfc() const {
|
||||
return unaryExpr(internal::scalar_erfc_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived>
|
||||
sigmoid() const {
|
||||
|
@ -507,6 +507,115 @@ static void test_cuda_convolution_3d()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_lgamma(const Scalar stddev)
|
||||
{
|
||||
Tensor<Scalar, 2> in(72,97);
|
||||
in.setRandom();
|
||||
in *= in.constant(stddev);
|
||||
Tensor<Scalar, 2> out(72,97);
|
||||
out.setZero();
|
||||
|
||||
std::size_t bytes = in.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in.lgamma();
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 72; ++i) {
|
||||
for (int j = 0; j < 97; ++j) {
|
||||
VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_erf(const Scalar stddev)
|
||||
{
|
||||
Tensor<Scalar, 2> in(72,97);
|
||||
in.setRandom();
|
||||
in *= in.constant(stddev);
|
||||
Tensor<Scalar, 2> out(72,97);
|
||||
out.setZero();
|
||||
|
||||
std::size_t bytes = in.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in.erf();
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 72; ++i) {
|
||||
for (int j = 0; j < 97; ++j) {
|
||||
VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_erfc(const Scalar stddev)
|
||||
{
|
||||
Tensor<Scalar, 2> in(72,97);
|
||||
in.setRandom();
|
||||
in *= in.constant(stddev);
|
||||
Tensor<Scalar, 2> out(72,97);
|
||||
out.setZero();
|
||||
|
||||
std::size_t bytes = in.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in.erfc();
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 72; ++i) {
|
||||
for (int j = 0; j < 97; ++j) {
|
||||
VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_cuda()
|
||||
{
|
||||
CALL_SUBTEST(test_cuda_elementwise_small());
|
||||
@ -522,4 +631,34 @@ void test_cxx11_tensor_cuda()
|
||||
CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_lgamma<float>(1.0f));
|
||||
CALL_SUBTEST(test_cuda_lgamma<float>(100.0f));
|
||||
CALL_SUBTEST(test_cuda_lgamma<float>(0.01f));
|
||||
CALL_SUBTEST(test_cuda_lgamma<float>(0.001f));
|
||||
CALL_SUBTEST(test_cuda_erf<float>(1.0f));
|
||||
CALL_SUBTEST(test_cuda_erf<float>(100.0f));
|
||||
CALL_SUBTEST(test_cuda_erf<float>(0.01f));
|
||||
CALL_SUBTEST(test_cuda_erf<float>(0.001f));
|
||||
CALL_SUBTEST(test_cuda_erfc<float>(1.0f));
|
||||
// CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
|
||||
CALL_SUBTEST(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
|
||||
CALL_SUBTEST(test_cuda_erfc<float>(0.01f));
|
||||
CALL_SUBTEST(test_cuda_erfc<float>(0.001f));
|
||||
CALL_SUBTEST(test_cuda_tanh<double>(1.0));
|
||||
CALL_SUBTEST(test_cuda_tanh<double>(100.0));
|
||||
CALL_SUBTEST(test_cuda_tanh<double>(0.01));
|
||||
CALL_SUBTEST(test_cuda_tanh<double>(0.001));
|
||||
CALL_SUBTEST(test_cuda_lgamma<double>(1.0));
|
||||
CALL_SUBTEST(test_cuda_lgamma<double>(100.0));
|
||||
CALL_SUBTEST(test_cuda_lgamma<double>(0.01));
|
||||
CALL_SUBTEST(test_cuda_lgamma<double>(0.001));
|
||||
CALL_SUBTEST(test_cuda_erf<double>(1.0));
|
||||
CALL_SUBTEST(test_cuda_erf<double>(100.0));
|
||||
CALL_SUBTEST(test_cuda_erf<double>(0.01));
|
||||
CALL_SUBTEST(test_cuda_erf<double>(0.001));
|
||||
CALL_SUBTEST(test_cuda_erfc<double>(1.0));
|
||||
// CALL_SUBTEST(test_cuda_erfc<double>(100.0));
|
||||
CALL_SUBTEST(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
|
||||
CALL_SUBTEST(test_cuda_erfc<double>(0.01));
|
||||
CALL_SUBTEST(test_cuda_erfc<double>(0.001));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user