diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 0ed5d2cc5..449793372 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -727,34 +727,6 @@ pblend(const Selector::size>& ifPacket, const Packet& th return ifPacket.select[0] ? thenPacket : elsePacket; } -/** \internal \returns \a a with the first coefficient replaced by the scalar b */ -template EIGEN_DEVICE_FUNC inline Packet -pinsertfirst(const Packet& a, typename unpacket_traits::type b) -{ - // Default implementation based on pblend. - // It must be specialized for higher performance. - Selector::size> mask; - mask.select[0] = true; - // This for loop should be optimized away by the compiler. - for(Index i=1; i::size; ++i) - mask.select[i] = false; - return pblend(mask, pset1(b), a); -} - -/** \internal \returns \a a with the last coefficient replaced by the scalar b */ -template EIGEN_DEVICE_FUNC inline Packet -pinsertlast(const Packet& a, typename unpacket_traits::type b) -{ - // Default implementation based on pblend. - // It must be specialized for higher performance. - Selector::size> mask; - // This for loop should be optimized away by the compiler. - for(Index i=0; i::size-1; ++i) - mask.select[i] = false; - mask.select[unpacket_traits::size-1] = true; - return pblend(mask, pset1(b), a); -} - /*************************************************************************** * Some generic implementations to be used by implementors ***************************************************************************/ diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 893eb2702..c2d5205f2 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -402,26 +402,6 @@ ptranspose(PacketBlock& kernel) { kernel.packet[0].v = tmp; } -template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex b) -{ - return Packet4cf(_mm256_blend_ps(a.v,pset1(b).v,1|2)); -} - -template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex b) -{ - return Packet2cd(_mm256_blend_pd(a.v,pset1(b).v,1|2)); -} - -template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex b) -{ - return Packet4cf(_mm256_blend_ps(a.v,pset1(b).v,(1<<7)|(1<<6))); -} - -template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex b) -{ - return Packet2cd(_mm256_blend_pd(a.v,pset1(b).v,(1<<3)|(1<<2))); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 10196fd6d..35a329e3f 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -763,27 +763,6 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons return _mm256_blendv_pd(thenPacket, elsePacket, false_mask); } -template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b) -{ - return _mm256_blend_ps(a,pset1(b),1); -} - -template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b) -{ - return _mm256_blend_pd(a,pset1(b),1); -} - -template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b) -{ - return _mm256_blend_ps(a,pset1(b),(1<<7)); -} - -template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b) -{ - return _mm256_blend_pd(a,pset1(b),(1<<3)); -} - - // Packet math for Eigen::half template<> struct unpacket_traits { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; }; @@ -981,16 +960,6 @@ template<> EIGEN_STRONG_INLINE Packet8h preverse(const Packet8h& a) return _mm_shuffle_epi8(a,m); } -template<> EIGEN_STRONG_INLINE Packet8h pinsertfirst(const Packet8h& a, Eigen::half b) -{ - return _mm_insert_epi16(a,int(b.x),0); -} - -template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::half b) -{ - return _mm_insert_epi16(a,int(b.x),7); -} - EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { __m128i a = kernel.packet[0]; diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index 75bdf57f1..dc2ae0a35 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -440,30 +440,6 @@ ptranspose(PacketBlock& kernel) { kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, EIGEN_SSE_SHUFFLE_MASK(0,2,0,2))); // [a0 b0 c0 d0] } -template<> EIGEN_STRONG_INLINE Packet8cf pinsertfirst(const Packet8cf& a, std::complex b) -{ - Packet2cf tmp = Packet2cf(_mm512_extractf32x4_ps(a.v,0)); - tmp = pinsertfirst(tmp, b); - return Packet8cf( _mm512_insertf32x4(a.v, tmp.v, 0) ); -} - -template<> EIGEN_STRONG_INLINE Packet4cd pinsertfirst(const Packet4cd& a, std::complex b) -{ - return Packet4cd(_mm512_castsi512_pd( _mm512_inserti32x4(_mm512_castpd_si512(a.v), _mm_castpd_si128(pset1(b).v), 0) )); -} - -template<> EIGEN_STRONG_INLINE Packet8cf pinsertlast(const Packet8cf& a, std::complex b) -{ - Packet2cf tmp = Packet2cf(_mm512_extractf32x4_ps(a.v,3) ); - tmp = pinsertlast(tmp, b); - return Packet8cf( _mm512_insertf32x4(a.v, tmp.v, 3) ); -} - -template<> EIGEN_STRONG_INLINE Packet4cd pinsertlast(const Packet4cd& a, std::complex b) -{ - return Packet4cd(_mm512_castsi512_pd( _mm512_inserti32x4(_mm512_castpd_si512(a.v), _mm_castpd_si128(pset1(b).v), 3) )); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 346d1f06e..10a1d4adb 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -1194,26 +1194,6 @@ EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket, return _mm512_mask_blend_pd(m, elsePacket, thenPacket); } -template<> EIGEN_STRONG_INLINE Packet16f pinsertfirst(const Packet16f& a, float b) -{ - return _mm512_mask_broadcastss_ps(a, (1), _mm_load_ss(&b)); -} - -template<> EIGEN_STRONG_INLINE Packet8d pinsertfirst(const Packet8d& a, double b) -{ - return _mm512_mask_broadcastsd_pd(a, (1), _mm_load_sd(&b)); -} - -template<> EIGEN_STRONG_INLINE Packet16f pinsertlast(const Packet16f& a, float b) -{ - return _mm512_mask_broadcastss_ps(a, (1<<15), _mm_load_ss(&b)); -} - -template<> EIGEN_STRONG_INLINE Packet8d pinsertlast(const Packet8d& a, double b) -{ - return _mm512_mask_broadcastsd_pd(a, (1<<7), _mm_load_sd(&b)); -} - template<> EIGEN_STRONG_INLINE Packet16i pcast(const Packet16f& a) { return _mm512_cvttps_epi32(a); } @@ -1432,16 +1412,6 @@ template<> EIGEN_STRONG_INLINE Packet16h preverse(const Packet16h& a) _mm_shuffle_epi8(_mm256_extractf128_si256(a,0),m), 1); } -template<> EIGEN_STRONG_INLINE Packet16h pinsertfirst(const Packet16h& a, Eigen::half b) -{ - return _mm256_insert_epi16(a,b.x,0); -} - -template<> EIGEN_STRONG_INLINE Packet16h pinsertlast(const Packet16h& a, Eigen::half b) -{ - return _mm256_insert_epi16(a,b.x,15); -} - template<> EIGEN_STRONG_INLINE Packet16h pgather(const Eigen::half* from, Index stride) { return _mm256_set_epi16( diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 5937433f5..ee5a938b9 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3074,49 +3074,6 @@ template<> EIGEN_DEVICE_FUNC inline Packet2l pselect(const Packet2l& mask, const template<> EIGEN_DEVICE_FUNC inline Packet2ul pselect(const Packet2ul& mask, const Packet2ul& a, const Packet2ul& b) { return vbslq_u64(mask, a, b); } -EIGEN_DEVICE_FUNC inline Packet2f pinsertfirst(const Packet2f& a, float b) { return vset_lane_f32(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet4f pinsertfirst(const Packet4f& a, float b) { return vsetq_lane_f32(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet4c pinsertfirst(const Packet4c& a, int8_t b) -{ - return static_cast((static_cast(a) & 0xffffff00u) | - (static_cast(b) & 0xffu)); -} -EIGEN_DEVICE_FUNC inline Packet8c pinsertfirst(const Packet8c& a, int8_t b) { return vset_lane_s8(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet16c pinsertfirst(const Packet16c& a, int8_t b) { return vsetq_lane_s8(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet4uc pinsertfirst(const Packet4uc& a, uint8_t b) { return (a & ~0xffu) | b; } -EIGEN_DEVICE_FUNC inline Packet8uc pinsertfirst(const Packet8uc& a, uint8_t b) { return vset_lane_u8(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet16uc pinsertfirst(const Packet16uc& a, uint8_t b) { return vsetq_lane_u8(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet4s pinsertfirst(const Packet4s& a, int16_t b) { return vset_lane_s16(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet8s pinsertfirst(const Packet8s& a, int16_t b) { return vsetq_lane_s16(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet4us pinsertfirst(const Packet4us& a, uint16_t b) { return vset_lane_u16(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet8us pinsertfirst(const Packet8us& a, uint16_t b) { return vsetq_lane_u16(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet2i pinsertfirst(const Packet2i& a, int32_t b) { return vset_lane_s32(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet4i pinsertfirst(const Packet4i& a, int32_t b) { return vsetq_lane_s32(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet2ui pinsertfirst(const Packet2ui& a, uint32_t b) { return vset_lane_u32(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet4ui pinsertfirst(const Packet4ui& a, uint32_t b) { return vsetq_lane_u32(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet2l pinsertfirst(const Packet2l& a, int64_t b) { return vsetq_lane_s64(b, a, 0); } -EIGEN_DEVICE_FUNC inline Packet2ul pinsertfirst(const Packet2ul& a, uint64_t b) { return vsetq_lane_u64(b, a, 0); } - -EIGEN_DEVICE_FUNC inline Packet2f pinsertlast(const Packet2f& a, float b) { return vset_lane_f32(b, a, 1); } -EIGEN_DEVICE_FUNC inline Packet4f pinsertlast(const Packet4f& a, float b) { return vsetq_lane_f32(b, a, 3); } -EIGEN_DEVICE_FUNC inline Packet4c pinsertlast(const Packet4c& a, int8_t b) -{ return (static_cast(a) & 0x00ffffffu) | (static_cast(b) << 24); } -EIGEN_DEVICE_FUNC inline Packet8c pinsertlast(const Packet8c& a, int8_t b) { return vset_lane_s8(b, a, 7); } -EIGEN_DEVICE_FUNC inline Packet16c pinsertlast(const Packet16c& a, int8_t b) { return vsetq_lane_s8(b, a, 15); } -EIGEN_DEVICE_FUNC inline Packet4uc pinsertlast(const Packet4uc& a, uint8_t b) { return (a & ~0xff000000u) | (b << 24); } -EIGEN_DEVICE_FUNC inline Packet8uc pinsertlast(const Packet8uc& a, uint8_t b) { return vset_lane_u8(b, a, 7); } -EIGEN_DEVICE_FUNC inline Packet16uc pinsertlast(const Packet16uc& a, uint8_t b) { return vsetq_lane_u8(b, a, 15); } -EIGEN_DEVICE_FUNC inline Packet4s pinsertlast(const Packet4s& a, int16_t b) { return vset_lane_s16(b, a, 3); } -EIGEN_DEVICE_FUNC inline Packet8s pinsertlast(const Packet8s& a, int16_t b) { return vsetq_lane_s16(b, a, 7); } -EIGEN_DEVICE_FUNC inline Packet4us pinsertlast(const Packet4us& a, uint16_t b) { return vset_lane_u16(b, a, 3); } -EIGEN_DEVICE_FUNC inline Packet8us pinsertlast(const Packet8us& a, uint16_t b) { return vsetq_lane_u16(b, a, 7); } -EIGEN_DEVICE_FUNC inline Packet2i pinsertlast(const Packet2i& a, int32_t b) { return vset_lane_s32(b, a, 1); } -EIGEN_DEVICE_FUNC inline Packet4i pinsertlast(const Packet4i& a, int32_t b) { return vsetq_lane_s32(b, a, 3); } -EIGEN_DEVICE_FUNC inline Packet2ui pinsertlast(const Packet2ui& a, uint32_t b) { return vset_lane_u32(b, a, 1); } -EIGEN_DEVICE_FUNC inline Packet4ui pinsertlast(const Packet4ui& a, uint32_t b) { return vsetq_lane_u32(b, a, 3); } -EIGEN_DEVICE_FUNC inline Packet2l pinsertlast(const Packet2l& a, int64_t b) { return vsetq_lane_s64(b, a, 1); } -EIGEN_DEVICE_FUNC inline Packet2ul pinsertlast(const Packet2ul& a, uint64_t b) { return vsetq_lane_u64(b, a, 1); } - /** * Computes the integer square root * @remarks The calculation is performed using an algorithm which iterates through each binary digit of the result @@ -3436,10 +3393,6 @@ ptranspose(PacketBlock& kernel) template<> EIGEN_DEVICE_FUNC inline Packet2d pselect( const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return vbslq_f64(vreinterpretq_u64_f64(mask), a, b); } -EIGEN_DEVICE_FUNC inline Packet2d pinsertfirst(const Packet2d& a, double b) { return vsetq_lane_f64(b, a, 0); } - -EIGEN_DEVICE_FUNC inline Packet2d pinsertlast(const Packet2d& a, double b) { return vsetq_lane_f64(b, a, 1); } - #endif // EIGEN_ARCH_ARM64 } // end namespace internal diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index a16d73e27..8bf8bfe85 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -429,26 +429,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co return Packet2cf(_mm_castpd_ps(result)); } -template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex b) -{ - return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast(&b))); -} - -template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex b) -{ - return pset1(b); -} - -template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex b) -{ - return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast(&b))); -} - -template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex b) -{ - return pset1(b); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index cf2f0be17..645aee0cd 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -936,44 +936,6 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons #endif } -template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b) -{ -#ifdef EIGEN_VECTORIZE_SSE4_1 - return _mm_blend_ps(a,pset1(b),1); -#else - return _mm_move_ss(a, _mm_load_ss(&b)); -#endif -} - -template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b) -{ -#ifdef EIGEN_VECTORIZE_SSE4_1 - return _mm_blend_pd(a,pset1(b),1); -#else - return _mm_move_sd(a, _mm_load_sd(&b)); -#endif -} - -template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b) -{ -#ifdef EIGEN_VECTORIZE_SSE4_1 - return _mm_blend_ps(a,pset1(b),(1<<3)); -#else - const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF)); - return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1(b))); -#endif -} - -template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b) -{ -#ifdef EIGEN_VECTORIZE_SSE4_1 - return _mm_blend_pd(a,pset1(b),(1<<1)); -#else - const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF)); - return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1(b))); -#endif -} - // Scalar path for pmadd with FMA to ensure consistency with vectorized path. #ifdef EIGEN_VECTORIZE_FMA template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) { diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index a3b737ad8..4aa33a19f 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -66,17 +66,17 @@ struct linspaced_op_impl { Packet pi = plset(Scalar(i-m_size1)); Packet res = padd(pset1(m_high), pmul(pset1(m_step), pi)); - if(i==0) - res = pinsertfirst(res, m_low); - return res; + if (EIGEN_PREDICT_TRUE(i != 0)) return res; + Packet mask = pcmp_lt(pset1(0), plset(0)); + return pselect(mask, res, pset1(m_low)); } else { Packet pi = plset(Scalar(i)); Packet res = padd(pset1(m_low), pmul(pset1(m_step), pi)); - if(i==m_size1-unpacket_traits::size+1) - res = pinsertlast(res, m_high); - return res; + if(EIGEN_PREDICT_TRUE(i != m_size1-unpacket_traits::size+1)) return res; + Packet mask = pcmp_lt(plset(0), pset1(unpacket_traits::size-1)); + return pselect(mask, res, pset1(m_high)); } } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 7341d67e7..c7732e6e6 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -288,26 +288,6 @@ template void packetmath() } } - if (PacketTraits::HasInsert || g_vectorize_sse) { - // pinsertfirst - for (int i=0; i(); - ref[0] = s; - internal::pstore(data2, internal::pinsertfirst(internal::pload(data1),s)); - VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pinsertfirst"); - } - - if (PacketTraits::HasInsert || g_vectorize_sse) { - // pinsertlast - for (int i=0; i(); - ref[PacketSize-1] = s; - internal::pstore(data2, internal::pinsertlast(internal::pload(data1),s)); - VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pinsertlast"); - } - { for (int i = 0; i < PacketSize; ++i) { // "if" mask diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h index de1a05508..04f719f96 100644 --- a/test/packetmath_test_shared.h +++ b/test/packetmath_test_shared.h @@ -16,12 +16,6 @@ #endif // using namespace Eigen; -#ifdef EIGEN_VECTORIZE_SSE -const bool g_vectorize_sse = true; -#else -const bool g_vectorize_sse = false; -#endif - bool g_first_pass = true; namespace Eigen {