add vectorization of sqrt for float

This commit is contained in:
Gael Guennebaud 2009-03-27 14:41:46 +00:00
parent 3499f6eccd
commit 49fc1e3e84
6 changed files with 29 additions and 12 deletions

View File

@ -102,7 +102,7 @@ namespace Eigen {
#if defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/TranscendentalFunctions.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#elif defined EIGEN_VECTORIZE_ALTIVEC
#include "src/Core/arch/AltiVec/PacketMath.h"
#endif

View File

@ -58,10 +58,16 @@ struct ei_functor_traits<ei_scalar_add_op<Scalar> >
*/
template<typename Scalar> struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT {
inline const Scalar operator() (const Scalar& a) const { return ei_sqrt(a); }
typedef typename ei_packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return ei_psqrt(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
{ enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::HasSqrt
};
};
/** \internal
*

View File

@ -46,6 +46,7 @@ struct ei_default_packet_traits
HasMax = 1,
HasDiv = 0,
HasSqrt = 0,
HasExp = 0,
HasLog = 0,
HasPow = 0,
@ -192,6 +193,9 @@ template<typename Packet> inline Packet ei_pexp(Packet a) { return ei_exp(a); }
/** \internal \returns the log of \a a (coeff-wise) */
template<typename Packet> inline Packet ei_plog(Packet a) { return ei_log(a); }
/** \internal \returns the square-root of \a a (coeff-wise) */
template<typename Packet> inline Packet ei_psqrt(Packet a) { return ei_log(a); }
/***************************************************************************
* The following functions might not have to be overwritten for vectorized types
***************************************************************************/

View File

@ -23,9 +23,9 @@
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
/* The functions of this file come from Julien Pommier's sse math library.
* which is itself inspired by Intel Approximate Math library, and based on the
* corresponding algorithms of the cephes math library.
/* The sin, cos, exp, and log functions of this file come from Julien Pommier's sse
* math library, which is itself inspired by Intel Approximate Math library,
* and based on the corresponding algorithms of the cephes math library.
*/
/* Copyright (C) 2007 Julien Pommier
@ -49,18 +49,16 @@
(this is the zlib license)
*/
#ifndef EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
#define EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
#define EIGEN_MATH_FUNCTIONS_SSE_H
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
/* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(mant_mask, 0x7f800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_sign_mask, ~0x80000000);
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
@ -214,7 +212,6 @@ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI
_EIGEN_DECLARE_CONST_Packet4f(2pi, 2.*M_PI);
template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x)
{
@ -358,4 +355,12 @@ template<> Packet4f ei_pcos(Packet4f x)
return _mm_xor_ps(y, sign_bit);
}
#endif // EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
template<> Packet4f ei_psqrt(Packet4f _x)
{
Packet4f half = ei_pmul(_x, ei_pset1(.5f));
Packet4f x = _mm_rsqrt_ps(_x);
x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
return ei_pmul(_x,x);
}
#endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@ -61,7 +61,8 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
HasSin = 1,
HasCos = 1,
HasLog = 1,
HasExp = 1
HasExp = 1,
HasSqrt = 1
};
};
template<> struct ei_packet_traits<double> : ei_default_packet_traits

View File

@ -227,6 +227,7 @@ template<typename Scalar> void packetmath_real()
data2[i] = ei_random<Scalar>(0,1e6);
}
CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasLog, ei_log, ei_plog);
CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasSqrt, ei_sqrt, ei_psqrt);
}
void test_packetmath()