mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-01 18:26:24 +08:00
add vectorization of sqrt for float
This commit is contained in:
parent
3499f6eccd
commit
49fc1e3e84
@ -102,7 +102,7 @@ namespace Eigen {
|
||||
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/TranscendentalFunctions.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#elif defined EIGEN_VECTORIZE_ALTIVEC
|
||||
#include "src/Core/arch/AltiVec/PacketMath.h"
|
||||
#endif
|
||||
|
@ -58,10 +58,16 @@ struct ei_functor_traits<ei_scalar_add_op<Scalar> >
|
||||
*/
|
||||
template<typename Scalar> struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT {
|
||||
inline const Scalar operator() (const Scalar& a) const { return ei_sqrt(a); }
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
inline Packet packetOp(const Packet& a) const { return ei_psqrt(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
{ enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::HasSqrt
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
*
|
||||
|
@ -46,6 +46,7 @@ struct ei_default_packet_traits
|
||||
HasMax = 1,
|
||||
|
||||
HasDiv = 0,
|
||||
HasSqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasPow = 0,
|
||||
@ -192,6 +193,9 @@ template<typename Packet> inline Packet ei_pexp(Packet a) { return ei_exp(a); }
|
||||
/** \internal \returns the log of \a a (coeff-wise) */
|
||||
template<typename Packet> inline Packet ei_plog(Packet a) { return ei_log(a); }
|
||||
|
||||
/** \internal \returns the square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> inline Packet ei_psqrt(Packet a) { return ei_log(a); }
|
||||
|
||||
/***************************************************************************
|
||||
* The following functions might not have to be overwritten for vectorized types
|
||||
***************************************************************************/
|
||||
|
@ -23,9 +23,9 @@
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
/* The functions of this file come from Julien Pommier's sse math library.
|
||||
* which is itself inspired by Intel Approximate Math library, and based on the
|
||||
* corresponding algorithms of the cephes math library.
|
||||
/* The sin, cos, exp, and log functions of this file come from Julien Pommier's sse
|
||||
* math library, which is itself inspired by Intel Approximate Math library,
|
||||
* and based on the corresponding algorithms of the cephes math library.
|
||||
*/
|
||||
|
||||
/* Copyright (C) 2007 Julien Pommier
|
||||
@ -49,18 +49,16 @@
|
||||
(this is the zlib license)
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
|
||||
#define EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
#define EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
|
||||
/* the smallest non denormalized float number */
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(mant_mask, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
|
||||
// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_sign_mask, ~0x80000000);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
|
||||
@ -214,7 +212,6 @@ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI
|
||||
_EIGEN_DECLARE_CONST_Packet4f(2pi, 2.*M_PI);
|
||||
|
||||
template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x)
|
||||
{
|
||||
@ -358,4 +355,12 @@ template<> Packet4f ei_pcos(Packet4f x)
|
||||
return _mm_xor_ps(y, sign_bit);
|
||||
}
|
||||
|
||||
#endif // EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
|
||||
template<> Packet4f ei_psqrt(Packet4f _x)
|
||||
{
|
||||
Packet4f half = ei_pmul(_x, ei_pset1(.5f));
|
||||
Packet4f x = _mm_rsqrt_ps(_x);
|
||||
x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
|
||||
return ei_pmul(_x,x);
|
||||
}
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
|
@ -61,7 +61,8 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
HasSin = 1,
|
||||
HasCos = 1,
|
||||
HasLog = 1,
|
||||
HasExp = 1
|
||||
HasExp = 1,
|
||||
HasSqrt = 1
|
||||
};
|
||||
};
|
||||
template<> struct ei_packet_traits<double> : ei_default_packet_traits
|
||||
|
@ -227,6 +227,7 @@ template<typename Scalar> void packetmath_real()
|
||||
data2[i] = ei_random<Scalar>(0,1e6);
|
||||
}
|
||||
CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasLog, ei_log, ei_plog);
|
||||
CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasSqrt, ei_sqrt, ei_psqrt);
|
||||
}
|
||||
|
||||
void test_packetmath()
|
||||
|
Loading…
Reference in New Issue
Block a user