From 955c099eb58347c8d333cbd3ae274511f8050f54 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 23 Feb 2011 18:20:55 +0300 Subject: [PATCH] implement ploaddup for altivec and add respective unit test --- Eigen/src/Core/arch/AltiVec/Complex.h | 5 +++++ Eigen/src/Core/arch/AltiVec/PacketMath.h | 14 ++++++++++++++ test/packetmath.cpp | 12 ++++++++++-- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 53fb1ba31..fd118ccff 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -112,6 +112,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, template<> EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) +{ + return pset1(*from); +} + template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 96cb54283..efb4f62d1 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -73,6 +73,7 @@ static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 }; static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); +static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7}; static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); @@ -292,6 +293,19 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data } +template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) +{ + Packet16uc tmp; + tmp = vec_ld(0, (unsigned char *)from); // most significant quadword + return (Packet4f) vec_perm(tmp, tmp, p16uc_DUPLICATE); +} +template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) +{ + Packet16uc tmp; + tmp = vec_ld(0, (unsigned char *)from); // most significant quadword + return (Packet4i) vec_perm(tmp, tmp, p16uc_DUPLICATE); +} + template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index c89fd7314..d8a785519 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -126,8 +126,8 @@ template void packetmath() RealScalar refvalue = 0; for (int i=0; i()/PacketSize; - data2[i] = internal::random()/PacketSize; + data1[i] = internal::random()/RealScalar(PacketSize); + data2[i] = internal::random()/RealScalar(PacketSize); refvalue = std::max(refvalue,internal::abs(data1[i])); } @@ -179,6 +179,14 @@ template void packetmath() VERIFY(areApprox(ref, data2, PacketSize) && "internal::pset1"); VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload(data1))) && "internal::pfirst"); + + if(PacketSize>1) + { + for(int i=0;i(data1)); + VERIFY(areApprox(ref, data2, PacketSize) && "ploaddup"); + } ref[0] = 0; for (int i=0; i