mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
Add pinsertfirst function and implement pinsertlast for complex on SSE/AVX.
This commit is contained in:
parent
e44519744e
commit
598de8b193
@ -558,7 +558,21 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
|||||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal \returns \a a with last coefficients replaced by the scalar b */
|
/** \internal \returns \a a with the first coefficient replaced by the scalar b */
|
||||||
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
|
pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||||
|
{
|
||||||
|
// Default implementation based on pblend.
|
||||||
|
// It must be specialized for higher performance.
|
||||||
|
Selector<unpacket_traits<Packet>::size> mask;
|
||||||
|
mask.select[0] = true;
|
||||||
|
// This for loop should be optimized away by the compiler.
|
||||||
|
for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
|
||||||
|
mask.select[i] = false;
|
||||||
|
return pblend(mask, pset1<Packet>(b), a);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal \returns \a a with the last coefficient replaced by the scalar b */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||||
{
|
{
|
||||||
|
@ -456,6 +456,26 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
|||||||
kernel.packet[0].v = tmp;
|
kernel.packet[0].v = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
|
||||||
|
{
|
||||||
|
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
|
||||||
|
{
|
||||||
|
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
|
||||||
|
{
|
||||||
|
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
|
||||||
|
{
|
||||||
|
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -604,6 +604,16 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
|
|||||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
|
||||||
|
{
|
||||||
|
return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
|
||||||
|
{
|
||||||
|
return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
|
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
|
||||||
{
|
{
|
||||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
|
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
|
||||||
|
@ -476,6 +476,26 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co
|
|||||||
return Packet2cf(_mm_castpd_ps(result));
|
return Packet2cf(_mm_castpd_ps(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
|
||||||
|
{
|
||||||
|
return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
|
||||||
|
{
|
||||||
|
return pset1<Packet1cd>(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
|
||||||
|
{
|
||||||
|
return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
|
||||||
|
{
|
||||||
|
return pset1<Packet1cd>(b);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -818,6 +818,24 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
|
||||||
|
{
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||||
|
return _mm_blend_ps(a,pset1<Packet4f>(b),1);
|
||||||
|
#else
|
||||||
|
return _mm_move_ss(a, _mm_load_ss(&b));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
|
||||||
|
{
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||||
|
return _mm_blend_pd(a,pset1<Packet2d>(b),1);
|
||||||
|
#else
|
||||||
|
return _mm_move_sd(a, _mm_load_sd(&b));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
|
template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
|
||||||
{
|
{
|
||||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||||
|
@ -16,6 +16,12 @@
|
|||||||
#endif
|
#endif
|
||||||
// using namespace Eigen;
|
// using namespace Eigen;
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE
|
||||||
|
const bool g_vectorize_sse = true;
|
||||||
|
#else
|
||||||
|
const bool g_vectorize_sse = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
template<typename T> T negate(const T& x) { return -x; }
|
template<typename T> T negate(const T& x) { return -x; }
|
||||||
@ -290,7 +296,17 @@ template<typename Scalar> void packetmath()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PacketTraits::HasBlend) {
|
if (PacketTraits::HasBlend || g_vectorize_sse) {
|
||||||
|
// pinsertfirst
|
||||||
|
for (int i=0; i<PacketSize; ++i)
|
||||||
|
ref[i] = data1[i];
|
||||||
|
Scalar s = internal::random<Scalar>();
|
||||||
|
ref[0] = s;
|
||||||
|
internal::pstore(data2, internal::pinsertfirst(internal::pload<Packet>(data1),s));
|
||||||
|
VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertfirst");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PacketTraits::HasBlend || g_vectorize_sse) {
|
||||||
// pinsertlast
|
// pinsertlast
|
||||||
for (int i=0; i<PacketSize; ++i)
|
for (int i=0; i<PacketSize; ++i)
|
||||||
ref[i] = data1[i];
|
ref[i] = data1[i];
|
||||||
|
Loading…
Reference in New Issue
Block a user