mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-06 14:14:46 +08:00
Remove packet ops pinsertfirst and pinsertlast that are only used in a single place, and can be replaced by other ops when constructing the first/final packet in linspaced_op_impl::packetOp.
I cannot measure any performance changes for SSE, AVX, or AVX512. name old time/op new time/op delta BM_LinSpace<float>/1 1.63ns ± 0% 1.63ns ± 0% ~ (p=0.762 n=5+5) BM_LinSpace<float>/8 4.92ns ± 3% 4.89ns ± 3% ~ (p=0.421 n=5+5) BM_LinSpace<float>/64 34.6ns ± 0% 34.6ns ± 0% ~ (p=0.841 n=5+5) BM_LinSpace<float>/512 217ns ± 0% 217ns ± 0% ~ (p=0.421 n=5+5) BM_LinSpace<float>/4k 1.68µs ± 0% 1.68µs ± 0% ~ (p=1.000 n=5+5) BM_LinSpace<float>/32k 13.3µs ± 0% 13.3µs ± 0% ~ (p=0.905 n=5+4) BM_LinSpace<float>/256k 107µs ± 0% 107µs ± 0% ~ (p=0.841 n=5+5) BM_LinSpace<float>/1M 427µs ± 0% 427µs ± 0% ~ (p=0.690 n=5+5)
This commit is contained in:
parent
5c4e19fbe7
commit
c1d944dd91
@ -727,34 +727,6 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
|||||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal \returns \a a with the first coefficient replaced by the scalar b */
|
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
||||||
pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
|
|
||||||
{
|
|
||||||
// Default implementation based on pblend.
|
|
||||||
// It must be specialized for higher performance.
|
|
||||||
Selector<unpacket_traits<Packet>::size> mask;
|
|
||||||
mask.select[0] = true;
|
|
||||||
// This for loop should be optimized away by the compiler.
|
|
||||||
for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
|
|
||||||
mask.select[i] = false;
|
|
||||||
return pblend(mask, pset1<Packet>(b), a);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \internal \returns \a a with the last coefficient replaced by the scalar b */
|
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
|
||||||
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
|
||||||
{
|
|
||||||
// Default implementation based on pblend.
|
|
||||||
// It must be specialized for higher performance.
|
|
||||||
Selector<unpacket_traits<Packet>::size> mask;
|
|
||||||
// This for loop should be optimized away by the compiler.
|
|
||||||
for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
|
|
||||||
mask.select[i] = false;
|
|
||||||
mask.select[unpacket_traits<Packet>::size-1] = true;
|
|
||||||
return pblend(mask, pset1<Packet>(b), a);
|
|
||||||
}
|
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
* Some generic implementations to be used by implementors
|
* Some generic implementations to be used by implementors
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
@ -402,26 +402,6 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
|||||||
kernel.packet[0].v = tmp;
|
kernel.packet[0].v = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
|
|
||||||
{
|
|
||||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
|
|
||||||
{
|
|
||||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -763,27 +763,6 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
|
|||||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
|
|
||||||
{
|
|
||||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
|
|
||||||
{
|
|
||||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
|
|
||||||
{
|
|
||||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
|
|
||||||
{
|
|
||||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Packet math for Eigen::half
|
// Packet math for Eigen::half
|
||||||
template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; };
|
template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; };
|
||||||
|
|
||||||
@ -981,16 +960,6 @@ template<> EIGEN_STRONG_INLINE Packet8h preverse(const Packet8h& a)
|
|||||||
return _mm_shuffle_epi8(a,m);
|
return _mm_shuffle_epi8(a,m);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h pinsertfirst(const Packet8h& a, Eigen::half b)
|
|
||||||
{
|
|
||||||
return _mm_insert_epi16(a,int(b.x),0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::half b)
|
|
||||||
{
|
|
||||||
return _mm_insert_epi16(a,int(b.x),7);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void
|
EIGEN_STRONG_INLINE void
|
||||||
ptranspose(PacketBlock<Packet8h,8>& kernel) {
|
ptranspose(PacketBlock<Packet8h,8>& kernel) {
|
||||||
__m128i a = kernel.packet[0];
|
__m128i a = kernel.packet[0];
|
||||||
|
@ -440,30 +440,6 @@ ptranspose(PacketBlock<Packet4cd,4>& kernel) {
|
|||||||
kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, EIGEN_SSE_SHUFFLE_MASK(0,2,0,2))); // [a0 b0 c0 d0]
|
kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, EIGEN_SSE_SHUFFLE_MASK(0,2,0,2))); // [a0 b0 c0 d0]
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pinsertfirst(const Packet8cf& a, std::complex<float> b)
|
|
||||||
{
|
|
||||||
Packet2cf tmp = Packet2cf(_mm512_extractf32x4_ps(a.v,0));
|
|
||||||
tmp = pinsertfirst(tmp, b);
|
|
||||||
return Packet8cf( _mm512_insertf32x4(a.v, tmp.v, 0) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pinsertfirst(const Packet4cd& a, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return Packet4cd(_mm512_castsi512_pd( _mm512_inserti32x4(_mm512_castpd_si512(a.v), _mm_castpd_si128(pset1<Packet1cd>(b).v), 0) ));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8cf pinsertlast(const Packet8cf& a, std::complex<float> b)
|
|
||||||
{
|
|
||||||
Packet2cf tmp = Packet2cf(_mm512_extractf32x4_ps(a.v,3) );
|
|
||||||
tmp = pinsertlast(tmp, b);
|
|
||||||
return Packet8cf( _mm512_insertf32x4(a.v, tmp.v, 3) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd pinsertlast(const Packet4cd& a, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return Packet4cd(_mm512_castsi512_pd( _mm512_inserti32x4(_mm512_castpd_si512(a.v), _mm_castpd_si128(pset1<Packet1cd>(b).v), 3) ));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -1194,26 +1194,6 @@ EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket,
|
|||||||
return _mm512_mask_blend_pd(m, elsePacket, thenPacket);
|
return _mm512_mask_blend_pd(m, elsePacket, thenPacket);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16f pinsertfirst(const Packet16f& a, float b)
|
|
||||||
{
|
|
||||||
return _mm512_mask_broadcastss_ps(a, (1), _mm_load_ss(&b));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8d pinsertfirst(const Packet8d& a, double b)
|
|
||||||
{
|
|
||||||
return _mm512_mask_broadcastsd_pd(a, (1), _mm_load_sd(&b));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16f pinsertlast(const Packet16f& a, float b)
|
|
||||||
{
|
|
||||||
return _mm512_mask_broadcastss_ps(a, (1<<15), _mm_load_ss(&b));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8d pinsertlast(const Packet8d& a, double b)
|
|
||||||
{
|
|
||||||
return _mm512_mask_broadcastsd_pd(a, (1<<7), _mm_load_sd(&b));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
|
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
|
||||||
return _mm512_cvttps_epi32(a);
|
return _mm512_cvttps_epi32(a);
|
||||||
}
|
}
|
||||||
@ -1432,16 +1412,6 @@ template<> EIGEN_STRONG_INLINE Packet16h preverse(const Packet16h& a)
|
|||||||
_mm_shuffle_epi8(_mm256_extractf128_si256(a,0),m), 1);
|
_mm_shuffle_epi8(_mm256_extractf128_si256(a,0),m), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16h pinsertfirst(const Packet16h& a, Eigen::half b)
|
|
||||||
{
|
|
||||||
return _mm256_insert_epi16(a,b.x,0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16h pinsertlast(const Packet16h& a, Eigen::half b)
|
|
||||||
{
|
|
||||||
return _mm256_insert_epi16(a,b.x,15);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16h pgather<Eigen::half, Packet16h>(const Eigen::half* from, Index stride)
|
template<> EIGEN_STRONG_INLINE Packet16h pgather<Eigen::half, Packet16h>(const Eigen::half* from, Index stride)
|
||||||
{
|
{
|
||||||
return _mm256_set_epi16(
|
return _mm256_set_epi16(
|
||||||
|
@ -3074,49 +3074,6 @@ template<> EIGEN_DEVICE_FUNC inline Packet2l pselect(const Packet2l& mask, const
|
|||||||
template<> EIGEN_DEVICE_FUNC inline Packet2ul pselect(const Packet2ul& mask, const Packet2ul& a, const Packet2ul& b)
|
template<> EIGEN_DEVICE_FUNC inline Packet2ul pselect(const Packet2ul& mask, const Packet2ul& a, const Packet2ul& b)
|
||||||
{ return vbslq_u64(mask, a, b); }
|
{ return vbslq_u64(mask, a, b); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2f pinsertfirst(const Packet2f& a, float b) { return vset_lane_f32(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4f pinsertfirst(const Packet4f& a, float b) { return vsetq_lane_f32(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4c pinsertfirst(const Packet4c& a, int8_t b)
|
|
||||||
{
|
|
||||||
return static_cast<int32_t>((static_cast<uint32_t>(a) & 0xffffff00u) |
|
|
||||||
(static_cast<uint32_t>(b) & 0xffu));
|
|
||||||
}
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8c pinsertfirst(const Packet8c& a, int8_t b) { return vset_lane_s8(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet16c pinsertfirst(const Packet16c& a, int8_t b) { return vsetq_lane_s8(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4uc pinsertfirst(const Packet4uc& a, uint8_t b) { return (a & ~0xffu) | b; }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8uc pinsertfirst(const Packet8uc& a, uint8_t b) { return vset_lane_u8(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet16uc pinsertfirst(const Packet16uc& a, uint8_t b) { return vsetq_lane_u8(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4s pinsertfirst(const Packet4s& a, int16_t b) { return vset_lane_s16(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8s pinsertfirst(const Packet8s& a, int16_t b) { return vsetq_lane_s16(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4us pinsertfirst(const Packet4us& a, uint16_t b) { return vset_lane_u16(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8us pinsertfirst(const Packet8us& a, uint16_t b) { return vsetq_lane_u16(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2i pinsertfirst(const Packet2i& a, int32_t b) { return vset_lane_s32(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4i pinsertfirst(const Packet4i& a, int32_t b) { return vsetq_lane_s32(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2ui pinsertfirst(const Packet2ui& a, uint32_t b) { return vset_lane_u32(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4ui pinsertfirst(const Packet4ui& a, uint32_t b) { return vsetq_lane_u32(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2l pinsertfirst(const Packet2l& a, int64_t b) { return vsetq_lane_s64(b, a, 0); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2ul pinsertfirst(const Packet2ul& a, uint64_t b) { return vsetq_lane_u64(b, a, 0); }
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2f pinsertlast(const Packet2f& a, float b) { return vset_lane_f32(b, a, 1); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4f pinsertlast(const Packet4f& a, float b) { return vsetq_lane_f32(b, a, 3); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4c pinsertlast(const Packet4c& a, int8_t b)
|
|
||||||
{ return (static_cast<uint32_t>(a) & 0x00ffffffu) | (static_cast<uint32_t>(b) << 24); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8c pinsertlast(const Packet8c& a, int8_t b) { return vset_lane_s8(b, a, 7); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet16c pinsertlast(const Packet16c& a, int8_t b) { return vsetq_lane_s8(b, a, 15); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4uc pinsertlast(const Packet4uc& a, uint8_t b) { return (a & ~0xff000000u) | (b << 24); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8uc pinsertlast(const Packet8uc& a, uint8_t b) { return vset_lane_u8(b, a, 7); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet16uc pinsertlast(const Packet16uc& a, uint8_t b) { return vsetq_lane_u8(b, a, 15); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4s pinsertlast(const Packet4s& a, int16_t b) { return vset_lane_s16(b, a, 3); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8s pinsertlast(const Packet8s& a, int16_t b) { return vsetq_lane_s16(b, a, 7); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4us pinsertlast(const Packet4us& a, uint16_t b) { return vset_lane_u16(b, a, 3); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet8us pinsertlast(const Packet8us& a, uint16_t b) { return vsetq_lane_u16(b, a, 7); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2i pinsertlast(const Packet2i& a, int32_t b) { return vset_lane_s32(b, a, 1); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4i pinsertlast(const Packet4i& a, int32_t b) { return vsetq_lane_s32(b, a, 3); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2ui pinsertlast(const Packet2ui& a, uint32_t b) { return vset_lane_u32(b, a, 1); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet4ui pinsertlast(const Packet4ui& a, uint32_t b) { return vsetq_lane_u32(b, a, 3); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2l pinsertlast(const Packet2l& a, int64_t b) { return vsetq_lane_s64(b, a, 1); }
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2ul pinsertlast(const Packet2ul& a, uint64_t b) { return vsetq_lane_u64(b, a, 1); }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the integer square root
|
* Computes the integer square root
|
||||||
* @remarks The calculation is performed using an algorithm which iterates through each binary digit of the result
|
* @remarks The calculation is performed using an algorithm which iterates through each binary digit of the result
|
||||||
@ -3436,10 +3393,6 @@ ptranspose(PacketBlock<Packet2d, 2>& kernel)
|
|||||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pselect( const Packet2d& mask, const Packet2d& a, const Packet2d& b)
|
template<> EIGEN_DEVICE_FUNC inline Packet2d pselect( const Packet2d& mask, const Packet2d& a, const Packet2d& b)
|
||||||
{ return vbslq_f64(vreinterpretq_u64_f64(mask), a, b); }
|
{ return vbslq_f64(vreinterpretq_u64_f64(mask), a, b); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2d pinsertfirst(const Packet2d& a, double b) { return vsetq_lane_f64(b, a, 0); }
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet2d pinsertlast(const Packet2d& a, double b) { return vsetq_lane_f64(b, a, 1); }
|
|
||||||
|
|
||||||
#endif // EIGEN_ARCH_ARM64
|
#endif // EIGEN_ARCH_ARM64
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
@ -429,26 +429,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co
|
|||||||
return Packet2cf(_mm_castpd_ps(result));
|
return Packet2cf(_mm_castpd_ps(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
|
|
||||||
{
|
|
||||||
return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return pset1<Packet1cd>(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
|
|
||||||
{
|
|
||||||
return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
|
|
||||||
{
|
|
||||||
return pset1<Packet1cd>(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -936,44 +936,6 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
|
|
||||||
{
|
|
||||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
||||||
return _mm_blend_ps(a,pset1<Packet4f>(b),1);
|
|
||||||
#else
|
|
||||||
return _mm_move_ss(a, _mm_load_ss(&b));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
|
|
||||||
{
|
|
||||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
||||||
return _mm_blend_pd(a,pset1<Packet2d>(b),1);
|
|
||||||
#else
|
|
||||||
return _mm_move_sd(a, _mm_load_sd(&b));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
|
|
||||||
{
|
|
||||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
||||||
return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
|
|
||||||
#else
|
|
||||||
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
|
|
||||||
return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
|
|
||||||
{
|
|
||||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
||||||
return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
|
|
||||||
#else
|
|
||||||
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
|
|
||||||
return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
|
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
|
||||||
#ifdef EIGEN_VECTORIZE_FMA
|
#ifdef EIGEN_VECTORIZE_FMA
|
||||||
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
|
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
|
||||||
|
@ -66,17 +66,17 @@ struct linspaced_op_impl<Scalar,/*IsInteger*/false>
|
|||||||
{
|
{
|
||||||
Packet pi = plset<Packet>(Scalar(i-m_size1));
|
Packet pi = plset<Packet>(Scalar(i-m_size1));
|
||||||
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
|
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
|
||||||
if(i==0)
|
if (EIGEN_PREDICT_TRUE(i != 0)) return res;
|
||||||
res = pinsertfirst(res, m_low);
|
Packet mask = pcmp_lt(pset1<Packet>(0), plset<Packet>(0));
|
||||||
return res;
|
return pselect<Packet>(mask, res, pset1<Packet>(m_low));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Packet pi = plset<Packet>(Scalar(i));
|
Packet pi = plset<Packet>(Scalar(i));
|
||||||
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
|
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
|
||||||
if(i==m_size1-unpacket_traits<Packet>::size+1)
|
if(EIGEN_PREDICT_TRUE(i != m_size1-unpacket_traits<Packet>::size+1)) return res;
|
||||||
res = pinsertlast(res, m_high);
|
Packet mask = pcmp_lt(plset<Packet>(0), pset1<Packet>(unpacket_traits<Packet>::size-1));
|
||||||
return res;
|
return pselect<Packet>(mask, res, pset1<Packet>(m_high));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,26 +288,6 @@ template<typename Scalar,typename Packet> void packetmath()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PacketTraits::HasInsert || g_vectorize_sse) {
|
|
||||||
// pinsertfirst
|
|
||||||
for (int i=0; i<PacketSize; ++i)
|
|
||||||
ref[i] = data1[i];
|
|
||||||
Scalar s = internal::random<Scalar>();
|
|
||||||
ref[0] = s;
|
|
||||||
internal::pstore(data2, internal::pinsertfirst(internal::pload<Packet>(data1),s));
|
|
||||||
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pinsertfirst");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PacketTraits::HasInsert || g_vectorize_sse) {
|
|
||||||
// pinsertlast
|
|
||||||
for (int i=0; i<PacketSize; ++i)
|
|
||||||
ref[i] = data1[i];
|
|
||||||
Scalar s = internal::random<Scalar>();
|
|
||||||
ref[PacketSize-1] = s;
|
|
||||||
internal::pstore(data2, internal::pinsertlast(internal::pload<Packet>(data1),s));
|
|
||||||
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pinsertlast");
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
{
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
// "if" mask
|
// "if" mask
|
||||||
|
@ -16,12 +16,6 @@
|
|||||||
#endif
|
#endif
|
||||||
// using namespace Eigen;
|
// using namespace Eigen;
|
||||||
|
|
||||||
#ifdef EIGEN_VECTORIZE_SSE
|
|
||||||
const bool g_vectorize_sse = true;
|
|
||||||
#else
|
|
||||||
const bool g_vectorize_sse = false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool g_first_pass = true;
|
bool g_first_pass = true;
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
Loading…
Reference in New Issue
Block a user