add vectorization of sqrt for float

2025-03-01 18:26:24 +08:00 · 2009-03-27 14:41:46 +00:00 · 2009-03-27 14:41:46 +00:00 · 49fc1e3e84
commit 49fc1e3e84
parent 3499f6eccd
6 changed files with 29 additions and 12 deletions
--- a/Eigen/Core
+++ b/Eigen/Core
@ -102,7 +102,7 @@ namespace Eigen {

 #if defined EIGEN_VECTORIZE_SSE
  #include "src/Core/arch/SSE/PacketMath.h"
-  #include "src/Core/arch/SSE/TranscendentalFunctions.h"
+  #include "src/Core/arch/SSE/MathFunctions.h"
 #elif defined EIGEN_VECTORIZE_ALTIVEC
  #include "src/Core/arch/AltiVec/PacketMath.h"
 #endif
--- a/Eigen/src/Array/Functors.h
+++ b/Eigen/src/Array/Functors.h
@ -58,10 +58,16 @@ struct ei_functor_traits<ei_scalar_add_op<Scalar> >
  */
 template<typename Scalar> struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT {
  inline const Scalar operator() (const Scalar& a) const { return ei_sqrt(a); }
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return ei_psqrt(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+{ enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = ei_packet_traits<Scalar>::HasSqrt
+  };
+};

 /** \internal
  *
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@ -46,6 +46,7 @@ struct ei_default_packet_traits
    HasMax    = 1,
    
    HasDiv    = 0,
+    HasSqrt   = 0,
    HasExp    = 0,
    HasLog    = 0,
    HasPow    = 0,
@ -192,6 +193,9 @@ template<typename Packet> inline Packet ei_pexp(Packet a) { return ei_exp(a); }
 /** \internal \returns the log of \a a (coeff-wise) */
 template<typename Packet> inline Packet ei_plog(Packet a) { return ei_log(a); }

+/** \internal \returns the square-root of \a a (coeff-wise) */
+template<typename Packet> inline Packet ei_psqrt(Packet a) { return ei_log(a); }
+
 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
 ***************************************************************************/
--- a/Eigen/src/Core/arch/SSE/TranscendentalFunctions.h
+++ b/Eigen/src/Core/arch/SSE/TranscendentalFunctions.h
@ -23,9 +23,9 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.

-/* The functions of this file come from Julien Pommier's sse math library.
- * which is itself inspired by Intel Approximate Math library, and based on the
- * corresponding algorithms of the cephes math library.
+/* The sin, cos, exp, and log functions of this file come from Julien Pommier's sse
+ * math library, which is itself inspired by Intel Approximate Math library,
+ * and based on the corresponding algorithms of the cephes math library.
 */

 /* Copyright (C) 2007  Julien Pommier
@ -49,18 +49,16 @@
  (this is the zlib license)
 */

-#ifndef EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
-#define EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H

 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
 /* the smallest non denormalized float number */
 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
-// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(mant_mask, 0x7f800000);
 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);

 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
-// _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_sign_mask, ~0x80000000);

 _EIGEN_DECLARE_CONST_Packet4i(1, 1);
 _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
@ -214,7 +212,6 @@ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0,  2.443315711809948E-005);
 _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003);
 _EIGEN_DECLARE_CONST_Packet4f(coscof_p2,  4.166664568298827E-002);
 _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI
-_EIGEN_DECLARE_CONST_Packet4f(2pi, 2.*M_PI);

 template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x)
 {
@ -358,4 +355,12 @@ template<> Packet4f ei_pcos(Packet4f x)
  return _mm_xor_ps(y, sign_bit);
 }

-#endif // EIGEN_TRANSCENDENTAL_FUNCTIONS_SSE_H
+template<> Packet4f ei_psqrt(Packet4f _x)
+{
+  Packet4f half = ei_pmul(_x, ei_pset1(.5f));
+  Packet4f x = _mm_rsqrt_ps(_x);
+  x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
+  return ei_pmul(_x,x);
+}
+
+#endif // EIGEN_MATH_FUNCTIONS_SSE_H
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -61,7 +61,8 @@ template<> struct ei_packet_traits<float>  : ei_default_packet_traits
    HasSin  = 1,
    HasCos  = 1,
    HasLog  = 1,
-    HasExp  = 1
+    HasExp  = 1,
+    HasSqrt = 1
  };
 };
 template<> struct ei_packet_traits<double> : ei_default_packet_traits
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@ -227,6 +227,7 @@ template<typename Scalar> void packetmath_real()
    data2[i] = ei_random<Scalar>(0,1e6);
  }
  CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasLog, ei_log, ei_plog);
+  CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasSqrt, ei_sqrt, ei_psqrt);
 }

 void test_packetmath()