mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Created the pblend packet primitive and implemented it using SSE and AVX instructions.
This commit is contained in:
parent
79085e08e9
commit
29aebf96e6
@ -54,6 +54,7 @@ struct default_packet_traits
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 1,
|
||||
HasBlend = 0,
|
||||
|
||||
HasDiv = 0,
|
||||
HasSqrt = 0,
|
||||
@ -429,6 +430,19 @@ ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
|
||||
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Selector, i.e. vector of N boolean values used to select (i.e. blend)
|
||||
* words from 2 packets.
|
||||
***************************************************************************/
|
||||
template <size_t N> struct Selector {
|
||||
bool select[N];
|
||||
};
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
|
||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -59,6 +59,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 0
|
||||
HasBlend = 1,
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
@ -73,6 +74,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 0
|
||||
HasBlend = 1,
|
||||
};
|
||||
};
|
||||
|
||||
@ -557,6 +559,19 @@ ptranspose(PacketBlock<Packet4d,4>& kernel) {
|
||||
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
|
||||
const __m256 zero = _mm256_setzero_ps();
|
||||
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
|
||||
return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
|
||||
const __m256d zero = _mm256_setzero_pd();
|
||||
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
|
||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -44,7 +44,8 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
@ -472,6 +473,11 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
kernel.packet[1].v = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
__m128d result = pblend(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
|
||||
return Packet2cf(_mm_castpd_ps(result));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -108,7 +108,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1
|
||||
HasSqrt = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
@ -123,7 +124,8 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1
|
||||
HasSqrt = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
@ -135,7 +137,9 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
// FIXME check the Has*
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4
|
||||
size=4,
|
||||
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
@ -809,6 +813,37 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m128i false_mask = _mm_cmpeq_epi32(select, zero);
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
|
||||
#else
|
||||
return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
|
||||
const __m128 zero = _mm_setzero_ps();
|
||||
const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m128 false_mask = _mm_cmpeq_ps(select, zero);
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
|
||||
#else
|
||||
return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
const __m128d zero = _mm_setzero_pd();
|
||||
const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
|
||||
__m128d false_mask = _mm_cmpeq_pd(select, zero);
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
#else
|
||||
return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -261,6 +261,22 @@ template<typename Scalar> void packetmath()
|
||||
VERIFY(isApproxAbs(data2[j], data1[i+j*PacketSize], refvalue) && "ptranspose");
|
||||
}
|
||||
}
|
||||
|
||||
if (internal::packet_traits<Scalar>::HasBlend) {
|
||||
Packet thenPacket = internal::pload<Packet>(data1);
|
||||
Packet elsePacket = internal::pload<Packet>(data2);
|
||||
EIGEN_ALIGN_DEFAULT internal::Selector<PacketSize> selector;
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
selector.select[i] = i;
|
||||
}
|
||||
|
||||
Packet blend = internal::pblend(selector, thenPacket, elsePacket);
|
||||
EIGEN_ALIGN_DEFAULT Scalar result[size];
|
||||
internal::pstore(result, blend);
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
VERIFY(isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Scalar> void packetmath_real()
|
||||
|
Loading…
Reference in New Issue
Block a user