mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
Add missing functions for Packet8bf in Altivec architecture.
Including new tests for bfloat16 Packets. Fix prsqrt on GenericPacketMath.
This commit is contained in:
parent
85428a3440
commit
35d149e34c
@ -612,7 +612,8 @@ Packet psqrt(const Packet& a) { EIGEN_USING_STD_MATH(sqrt); return sqrt(a); }
|
||||
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet prsqrt(const Packet& a) {
|
||||
return pdiv(pset1<Packet>(1), psqrt(a));
|
||||
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
||||
return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
|
||||
}
|
||||
|
||||
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
||||
|
@ -646,6 +646,11 @@ template<> EIGEN_DEVICE_FUNC inline Packet8us pgather<unsigned short int, Packet
|
||||
return pgather_size8<Packet8us>(from, stride);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8bf pgather<bfloat16, Packet8bf>(const bfloat16* from, Index stride)
|
||||
{
|
||||
return pgather_size8<Packet8bf>(from, stride);
|
||||
}
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_size16(const __UNPACK_TYPE__(Packet)* from, Index stride)
|
||||
{
|
||||
EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[16];
|
||||
@ -724,6 +729,11 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<unsigned short int, Packet8us>
|
||||
pscatter_size8<Packet8us>(to, from, stride);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<bfloat16, Packet8bf>(bfloat16* to, const Packet8bf& from, Index stride)
|
||||
{
|
||||
pscatter_size8<Packet8bf>(to, from, stride);
|
||||
}
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size16(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)
|
||||
{
|
||||
EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[16];
|
||||
@ -1285,7 +1295,30 @@ template<> EIGEN_STRONG_INLINE Packet8bf psub<Packet8bf>(const Packet8bf& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf psin<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(psin_float, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pcos<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pcos_float, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf plog<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(plog_float, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pfloor<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pfloor<Packet4f>, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pceil<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pceil<Packet4f>, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pround<Packet4f>, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) {
|
||||
Packet4f a_even = Bf16ToF32Even(a);
|
||||
Packet4f a_odd = Bf16ToF32Odd(a);
|
||||
@ -1325,6 +1358,12 @@ template<> EIGEN_STRONG_INLINE Packet8bf ploaddup<Packet8bf>(const bfloat16*
|
||||
return ploaddup<Packet8us>(reinterpret_cast<const unsigned short int*>(from));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf plset<Packet8bf>(const bfloat16& a) {
|
||||
bfloat16 countdown[8] = { bfloat16(0), bfloat16(1), bfloat16(2), bfloat16(3),
|
||||
bfloat16(4), bfloat16(5), bfloat16(6), bfloat16(7) };
|
||||
return padd<Packet8bf>(pset1<Packet8bf>(a), pload<Packet8bf>(countdown));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
|
||||
return pfrexp_float(a,exponent);
|
||||
}
|
||||
|
@ -707,6 +707,62 @@ void packetmath_real() {
|
||||
}
|
||||
}
|
||||
|
||||
#define CAST_CHECK_CWISE1_IF(COND, REFOP, POP, SCALAR, REFTYPE) if(COND) { \
|
||||
test::packet_helper<COND,Packet> h; \
|
||||
for (int i=0; i<PacketSize; ++i) \
|
||||
ref[i] = SCALAR(REFOP(static_cast<REFTYPE>(data1[i]))); \
|
||||
h.store(data2, POP(h.load(data1))); \
|
||||
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
||||
}
|
||||
|
||||
template <>
|
||||
void packetmath_real<bfloat16, typename internal::packet_traits<bfloat16>::type>(){
|
||||
typedef internal::packet_traits<bfloat16> PacketTraits;
|
||||
typedef internal::packet_traits<bfloat16>::type Packet;
|
||||
|
||||
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
||||
const int size = PacketSize * 4;
|
||||
EIGEN_ALIGN_MAX bfloat16 data1[PacketSize * 4];
|
||||
EIGEN_ALIGN_MAX bfloat16 data2[PacketSize * 4];
|
||||
EIGEN_ALIGN_MAX bfloat16 ref[PacketSize * 4];
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = bfloat16(internal::random<float>(0, 1) * std::pow(float(10), internal::random<float>(-6, 6)));
|
||||
data2[i] = bfloat16(internal::random<float>(0, 1) * std::pow(float(10), internal::random<float>(-6, 6)));
|
||||
data1[i] = bfloat16(0);
|
||||
}
|
||||
|
||||
if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = bfloat16(0);
|
||||
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog, bfloat16, float);
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasRsqrt, float(1) / std::sqrt, internal::prsqrt, bfloat16, float);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = bfloat16(internal::random<float>(-1, 1) * std::pow(float(10), internal::random<float>(-3, 3)));
|
||||
data2[i] = bfloat16(internal::random<float>(-1, 1) * std::pow(float(10), internal::random<float>(-3, 3)));
|
||||
}
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin, bfloat16, float);
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos, bfloat16, float);
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan, bfloat16, float);
|
||||
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround, bfloat16, float);
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil, bfloat16, float);
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor, bfloat16, float);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = bfloat16(-1.5 + i);
|
||||
data2[i] = bfloat16(-1.5 + i);
|
||||
}
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround, bfloat16, float);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = bfloat16(internal::random<float>(-87, 88));
|
||||
data2[i] = bfloat16(internal::random<float>(-87, 88));
|
||||
}
|
||||
CAST_CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp, bfloat16, float);
|
||||
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Packet>
|
||||
void packetmath_notcomplex() {
|
||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||
@ -761,6 +817,47 @@ void packetmath_notcomplex() {
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void packetmath_notcomplex<bfloat16, typename internal::packet_traits<bfloat16>::type>(){
|
||||
typedef bfloat16 Scalar;
|
||||
typedef internal::packet_traits<bfloat16>::type Packet;
|
||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
||||
|
||||
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
|
||||
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
|
||||
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
||||
Array<Scalar, Dynamic, 1>::Map(data1, PacketSize * 4).setRandom();
|
||||
|
||||
ref[0] = data1[0];
|
||||
for (int i = 0; i < PacketSize; ++i) ref[0] = (std::min)(ref[0], data1[i]);
|
||||
VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload<Packet>(data1))) && "internal::predux_min");
|
||||
|
||||
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin);
|
||||
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax);
|
||||
|
||||
CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin);
|
||||
CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax);
|
||||
CHECK_CWISE1(numext::abs, internal::pabs);
|
||||
CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff);
|
||||
|
||||
ref[0] = data1[0];
|
||||
for (int i = 0; i < PacketSize; ++i) ref[0] = (std::max)(ref[0], data1[i]);
|
||||
VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload<Packet>(data1))) && "internal::predux_max");
|
||||
|
||||
{
|
||||
unsigned char* data1_bits = reinterpret_cast<unsigned char*>(data1);
|
||||
// predux_any
|
||||
for (unsigned int i = 0; i < PacketSize * sizeof(Scalar); ++i) data1_bits[i] = 0x0;
|
||||
VERIFY((!internal::predux_any(internal::pload<Packet>(data1))) && "internal::predux_any(0000)");
|
||||
for (int k = 0; k < PacketSize; ++k) {
|
||||
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0xff;
|
||||
VERIFY(internal::predux_any(internal::pload<Packet>(data1)) && "internal::predux_any(0101)");
|
||||
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0x00;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Packet, bool ConjLhs, bool ConjRhs>
|
||||
void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) {
|
||||
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
||||
@ -819,7 +916,7 @@ void packetmath_scatter_gather() {
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
||||
EIGEN_ALIGN_MAX Scalar data1[PacketSize];
|
||||
RealScalar refvalue = 0;
|
||||
RealScalar refvalue = RealScalar(0);
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = internal::random<Scalar>() / RealScalar(PacketSize);
|
||||
}
|
||||
@ -900,7 +997,7 @@ EIGEN_DECLARE_TEST(packetmath) {
|
||||
CALL_SUBTEST_12(test::runner<std::complex<double> >::run());
|
||||
CALL_SUBTEST_13((packetmath<half, internal::packet_traits<half>::type>()));
|
||||
CALL_SUBTEST_14((packetmath<bool, internal::packet_traits<bool>::type>()));
|
||||
CALL_SUBTEST_15((packetmath<bfloat16, internal::packet_traits<bfloat16>::type>()));
|
||||
CALL_SUBTEST_15(test::runner<bfloat16>::run());
|
||||
g_first_pass = false;
|
||||
}
|
||||
}
|
||||
|
@ -208,7 +208,6 @@ struct runner<Scalar,PacketType,true,false>
|
||||
{
|
||||
static void run() {
|
||||
runall<Scalar,PacketType>::run();
|
||||
runall<Scalar,Scalar>::run();
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user