diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index 63ceace1e..6f81fe382 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -69,7 +69,7 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base { template explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const T& val) : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne::value>(static_cast(val))) {} - + explicit EIGEN_DEVICE_FUNC bfloat16(float f) : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne(f)) {} @@ -88,7 +88,7 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base { // +0.0 and -0.0 become false, everything else becomes true. return (value & 0x7fff) != 0; } -#endif +#endif }; } // namespace Eigen @@ -272,10 +272,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const return output; } -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) { return __bfloat16_raw(value); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) { + return bf.value; +} + // float_to_bfloat16_rtne template specialization that does not make any // assumption about the value of its function argument (ff). template <> @@ -454,7 +458,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff) { #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16)) @@ -691,7 +695,17 @@ bool (isfinite)(const Eigen::bfloat16& h) { return (bfloat16_impl::isfinite)(h); } -} // namespace numext +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast(const uint16_t& src) { + return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src)); +} + +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast(const Eigen::bfloat16& src) { + return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src); +} + +} // namespace numext } // namespace Eigen #endif // EIGEN_BFLOAT16_H diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index 0bc1e9d19..fda38bcb0 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h @@ -494,6 +494,19 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_h #endif } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_half_as_uint16(const __half_raw& h) { + // HIP/CUDA/Default have a member 'x' of type uint16_t. + // For ARM64 native half, the member 'x' is of type __fp16, so we need to bit-cast. + // For SYCL, cl::sycl::half is _Float16, so cast directly. +#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) + return numext::bit_cast(h.x); +#elif defined(SYCL_DEVICE_ONLY) + return numext::bit_cast(h); +#else + return h.x; +#endif +} + union float32_bits { unsigned int u; float f; @@ -812,10 +825,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) } #endif -#if defined(EIGEN_GPU_COMPILE_PHASE) namespace Eigen { namespace numext { +#if defined(EIGEN_GPU_COMPILE_PHASE) + template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isnan)(const Eigen::half& h) { return (half_impl::isnan)(h); @@ -830,8 +844,20 @@ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const Eigen::half& h) { return (half_impl::isfinite)(h); } -} // namespace numext -} // namespace Eigen + #endif +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bit_cast(const uint16_t& src) { + return Eigen::half(Eigen::half_impl::raw_uint16_to_half(src)); +} + +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast(const Eigen::half& src) { + return Eigen::half_impl::raw_half_as_uint16(src); +} + +} // namespace numext +} // namespace Eigen + #endif // EIGEN_HALF_H diff --git a/test/bfloat16_float.cpp b/test/bfloat16_float.cpp index 09df2b2f2..fc648dfec 100644 --- a/test/bfloat16_float.cpp +++ b/test/bfloat16_float.cpp @@ -13,6 +13,9 @@ #include +#define VERIFY_BFLOAT16_BITS_EQUAL(h, bits) \ + VERIFY_IS_EQUAL((numext::bit_cast(h)), (static_cast(bits))) + // Make sure it's possible to forward declare Eigen::bfloat16 namespace Eigen { struct bfloat16; @@ -58,31 +61,45 @@ void test_conversion() { using Eigen::bfloat16_impl::__bfloat16_raw; + // Round-trip casts + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(bfloat16(1.0f))), + bfloat16(1.0f)); + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(bfloat16(0.5f))), + bfloat16(0.5f)); + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(bfloat16(-0.33333f))), + bfloat16(-0.33333f)); + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(bfloat16(0.0f))), + bfloat16(0.0f)); + // Conversion from float. - VERIFY_IS_EQUAL(bfloat16(1.0f).value, 0x3f80); - VERIFY_IS_EQUAL(bfloat16(0.5f).value, 0x3f00); - VERIFY_IS_EQUAL(bfloat16(0.33333f).value, 0x3eab); - VERIFY_IS_EQUAL(bfloat16(3.38e38f).value, 0x7f7e); - VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80); // Becomes infinity. + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1.0f), 0x3f80); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f), 0x3f00); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.33333f), 0x3eab); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.38e38f), 0x7f7e); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.40e38f), 0x7f80); // Becomes infinity. // Verify round-to-nearest-even behavior. float val1 = static_cast(bfloat16(__bfloat16_raw(0x3c00))); float val2 = static_cast(bfloat16(__bfloat16_raw(0x3c01))); float val3 = static_cast(bfloat16(__bfloat16_raw(0x3c02))); - VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00); - VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val1 + val2)), 0x3c00); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val2 + val3)), 0x3c02); // Conversion from int. - VERIFY_IS_EQUAL(bfloat16(-1).value, 0xbf80); - VERIFY_IS_EQUAL(bfloat16(0).value, 0x0000); - VERIFY_IS_EQUAL(bfloat16(1).value, 0x3f80); - VERIFY_IS_EQUAL(bfloat16(2).value, 0x4000); - VERIFY_IS_EQUAL(bfloat16(3).value, 0x4040); - VERIFY_IS_EQUAL(bfloat16(12).value, 0x4140); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-1), 0xbf80); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0), 0x0000); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1), 0x3f80); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(2), 0x4000); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3), 0x4040); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(12), 0x4140); // Conversion from bool. - VERIFY_IS_EQUAL(bfloat16(false).value, 0x0000); - VERIFY_IS_EQUAL(bfloat16(true).value, 0x3f80); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(false), 0x0000); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(true), 0x3f80); // Conversion to bool VERIFY_IS_EQUAL(static_cast(bfloat16(3)), true); @@ -102,8 +119,8 @@ void test_conversion() VERIFY_IS_EQUAL(bfloat16(0.0f), bfloat16(0.0f)); VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(0.0f)); VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(-0.0f)); - VERIFY_IS_EQUAL(bfloat16(0.0f).value, 0x0000); - VERIFY_IS_EQUAL(bfloat16(-0.0f).value, 0x8000); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.0f), 0x0000); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-0.0f), 0x8000); // Flush denormals to zero for (float denorm = -std::numeric_limits::denorm_min(); @@ -117,16 +134,16 @@ void test_conversion() VERIFY_IS_EQUAL(bfloat16(denorm), false); if (std::signbit(denorm)) { - VERIFY_IS_EQUAL(bf_trunc.value, 0x8000); + VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x8000); } else { - VERIFY_IS_EQUAL(bf_trunc.value, 0x0000); + VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x0000); } bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne(denorm); VERIFY_IS_EQUAL(static_cast(bf_round), 0.0f); if (std::signbit(denorm)) { - VERIFY_IS_EQUAL(bf_round.value, 0x8000); + VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x8000); } else { - VERIFY_IS_EQUAL(bf_round.value, 0x0000); + VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x0000); } } @@ -231,33 +248,35 @@ void test_conversion() VERIFY((numext::isinf)(bfloat16(__bfloat16_raw(0x7f80)))); VERIFY((numext::isnan)(bfloat16(__bfloat16_raw(0x7fc0)))); - VERIFY_IS_EQUAL(bfloat16(BinaryToFloat(0x0, 0xff, 0x40, 0x0)).value, 0x7fc0); - VERIFY_IS_EQUAL(bfloat16(BinaryToFloat(0x1, 0xff, 0x40, 0x0)).value, 0xffc0); - VERIFY_IS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16( - BinaryToFloat(0x0, 0xff, 0x40, 0x0)) - .value, - 0x7fc0); - VERIFY_IS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16( - BinaryToFloat(0x1, 0xff, 0x40, 0x0)) - .value, - 0xffc0); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x0, 0xff, 0x40, 0x0)), 0x7fc0); + VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x1, 0xff, 0x40, 0x0)), 0xffc0); + VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16( + BinaryToFloat(0x0, 0xff, 0x40, 0x0)), + 0x7fc0); + VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16( + BinaryToFloat(0x1, 0xff, 0x40, 0x0)), + 0xffc0); } void test_numtraits() { - std::cout << "epsilon = " << NumTraits::epsilon() << " (0x" << std::hex << NumTraits::epsilon().value << ")" << std::endl; - std::cout << "highest = " << NumTraits::highest() << " (0x" << std::hex << NumTraits::highest().value << ")" << std::endl; - std::cout << "lowest = " << NumTraits::lowest() << " (0x" << std::hex << NumTraits::lowest().value << ")" << std::endl; - std::cout << "min = " << (std::numeric_limits::min)() << " (0x" << std::hex << (std::numeric_limits::min)().value << ")" << std::endl; - std::cout << "denorm min = " << (std::numeric_limits::denorm_min)() << " (0x" << std::hex << (std::numeric_limits::denorm_min)().value << ")" << std::endl; - std::cout << "infinity = " << NumTraits::infinity() << " (0x" << std::hex << NumTraits::infinity().value << ")" << std::endl; - std::cout << "quiet nan = " << NumTraits::quiet_NaN() << " (0x" << std::hex << NumTraits::quiet_NaN().value << ")" << std::endl; - std::cout << "signaling nan = " << std::numeric_limits::signaling_NaN() << " (0x" << std::hex << std::numeric_limits::signaling_NaN().value << ")" << std::endl; + std::cout << "epsilon = " << NumTraits::epsilon() << " (0x" << std::hex << numext::bit_cast(NumTraits::epsilon()) << ")" << std::endl; + std::cout << "highest = " << NumTraits::highest() << " (0x" << std::hex << numext::bit_cast(NumTraits::highest()) << ")" << std::endl; + std::cout << "lowest = " << NumTraits::lowest() << " (0x" << std::hex << numext::bit_cast(NumTraits::lowest()) << ")" << std::endl; + std::cout << "min = " << (std::numeric_limits::min)() << " (0x" << std::hex << numext::bit_cast((std::numeric_limits::min)()) << ")" << std::endl; + std::cout << "denorm min = " << (std::numeric_limits::denorm_min)() << " (0x" << std::hex << numext::bit_cast((std::numeric_limits::denorm_min)()) << ")" << std::endl; + std::cout << "infinity = " << NumTraits::infinity() << " (0x" << std::hex << numext::bit_cast(NumTraits::infinity()) << ")" << std::endl; + std::cout << "quiet nan = " << NumTraits::quiet_NaN() << " (0x" << std::hex << numext::bit_cast(NumTraits::quiet_NaN()) << ")" << std::endl; + std::cout << "signaling nan = " << std::numeric_limits::signaling_NaN() << " (0x" << std::hex << numext::bit_cast(std::numeric_limits::signaling_NaN()) << ")" << std::endl; VERIFY(NumTraits::IsSigned); - VERIFY_IS_EQUAL( std::numeric_limits::infinity().value, bfloat16(std::numeric_limits::infinity()).value ); - VERIFY_IS_EQUAL( std::numeric_limits::quiet_NaN().value, bfloat16(std::numeric_limits::quiet_NaN()).value ); + VERIFY_IS_EQUAL( + numext::bit_cast(std::numeric_limits::infinity()), + numext::bit_cast(bfloat16(std::numeric_limits::infinity())) ); + VERIFY_IS_EQUAL( + numext::bit_cast(std::numeric_limits::quiet_NaN()), + numext::bit_cast(bfloat16(std::numeric_limits::quiet_NaN())) ); VERIFY( (std::numeric_limits::min)() > bfloat16(0.f) ); VERIFY( (std::numeric_limits::denorm_min)() > bfloat16(0.f) ); VERIFY_IS_EQUAL( (std::numeric_limits::denorm_min)()/bfloat16(2), bfloat16(0.f) ); diff --git a/test/half_float.cpp b/test/half_float.cpp index b301b371d..cf6df547a 100644 --- a/test/half_float.cpp +++ b/test/half_float.cpp @@ -11,6 +11,9 @@ #include +#define VERIFY_HALF_BITS_EQUAL(h, bits) \ + VERIFY_IS_EQUAL((numext::bit_cast(h)), (static_cast(bits))) + // Make sure it's possible to forward declare Eigen::half namespace Eigen { struct half; @@ -22,75 +25,51 @@ void test_conversion() { using Eigen::half_impl::__half_raw; - // We don't use a uint16_t raw member x if the platform has native Arm __fp16 - // support -#if !defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) + // Round-trip bit-cast with uint16. + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(half(1.0f))), + half(1.0f)); + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(half(0.5f))), + half(0.5f)); + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(half(-0.33333f))), + half(-0.33333f)); + VERIFY_IS_EQUAL( + numext::bit_cast(numext::bit_cast(half(0.0f))), + half(0.0f)); + // Conversion from float. - VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); - VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); - VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555); - VERIFY_IS_EQUAL(half(0.0f).x, 0x0000); - VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000); - VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff); - VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity. + VERIFY_HALF_BITS_EQUAL(half(1.0f), 0x3c00); + VERIFY_HALF_BITS_EQUAL(half(0.5f), 0x3800); + VERIFY_HALF_BITS_EQUAL(half(0.33333f), 0x3555); + VERIFY_HALF_BITS_EQUAL(half(0.0f), 0x0000); + VERIFY_HALF_BITS_EQUAL(half(-0.0f), 0x8000); + VERIFY_HALF_BITS_EQUAL(half(65504.0f), 0x7bff); + VERIFY_HALF_BITS_EQUAL(half(65536.0f), 0x7c00); // Becomes infinity. // Denormals. - VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001); - VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001); - VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); + VERIFY_HALF_BITS_EQUAL(half(-5.96046e-08f), 0x8001); + VERIFY_HALF_BITS_EQUAL(half(5.96046e-08f), 0x0001); + VERIFY_HALF_BITS_EQUAL(half(1.19209e-07f), 0x0002); // Verify round-to-nearest-even behavior. float val1 = float(half(__half_raw(0x3c00))); float val2 = float(half(__half_raw(0x3c01))); float val3 = float(half(__half_raw(0x3c02))); - VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00); - VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02); + VERIFY_HALF_BITS_EQUAL(half(0.5f * (val1 + val2)), 0x3c00); + VERIFY_HALF_BITS_EQUAL(half(0.5f * (val2 + val3)), 0x3c02); // Conversion from int. - VERIFY_IS_EQUAL(half(-1).x, 0xbc00); - VERIFY_IS_EQUAL(half(0).x, 0x0000); - VERIFY_IS_EQUAL(half(1).x, 0x3c00); - VERIFY_IS_EQUAL(half(2).x, 0x4000); - VERIFY_IS_EQUAL(half(3).x, 0x4200); + VERIFY_HALF_BITS_EQUAL(half(-1), 0xbc00); + VERIFY_HALF_BITS_EQUAL(half(0), 0x0000); + VERIFY_HALF_BITS_EQUAL(half(1), 0x3c00); + VERIFY_HALF_BITS_EQUAL(half(2), 0x4000); + VERIFY_HALF_BITS_EQUAL(half(3), 0x4200); // Conversion from bool. - VERIFY_IS_EQUAL(half(false).x, 0x0000); - VERIFY_IS_EQUAL(half(true).x, 0x3c00); -#endif - -#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) - // Conversion from float. - VERIFY_IS_EQUAL(half(1.0f).x, __fp16(1.0f)); - VERIFY_IS_EQUAL(half(0.5f).x, __fp16(0.5f)); - VERIFY_IS_EQUAL(half(0.33333f).x, __fp16(0.33333f)); - VERIFY_IS_EQUAL(half(0.0f).x, __fp16(0.0f)); - VERIFY_IS_EQUAL(half(-0.0f).x, __fp16(-0.0f)); - VERIFY_IS_EQUAL(half(65504.0f).x, __fp16(65504.0f)); - VERIFY_IS_EQUAL(half(65536.0f).x, __fp16(65536.0f)); // Becomes infinity. - - // Denormals. - VERIFY_IS_EQUAL(half(-5.96046e-08f).x, __fp16(-5.96046e-08f)); - VERIFY_IS_EQUAL(half(5.96046e-08f).x, __fp16(5.96046e-08f)); - VERIFY_IS_EQUAL(half(1.19209e-07f).x, __fp16(1.19209e-07f)); - - // Verify round-to-nearest-even behavior. - float val1 = float(half(__half_raw(0x3c00))); - float val2 = float(half(__half_raw(0x3c01))); - float val3 = float(half(__half_raw(0x3c02))); - VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, __fp16(0.5f * (val1 + val2))); - VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, __fp16(0.5f * (val2 + val3))); - - // Conversion from int. - VERIFY_IS_EQUAL(half(-1).x, __fp16(-1)); - VERIFY_IS_EQUAL(half(0).x, __fp16(0)); - VERIFY_IS_EQUAL(half(1).x, __fp16(1)); - VERIFY_IS_EQUAL(half(2).x, __fp16(2)); - VERIFY_IS_EQUAL(half(3).x, __fp16(3)); - - // Conversion from bool. - VERIFY_IS_EQUAL(half(false).x, __fp16(false)); - VERIFY_IS_EQUAL(half(true).x, __fp16(true)); -#endif + VERIFY_HALF_BITS_EQUAL(half(false), 0x0000); + VERIFY_HALF_BITS_EQUAL(half(true), 0x3c00); // Conversion to float. VERIFY_IS_EQUAL(float(half(__half_raw(0x0000))), 0.0f); @@ -143,24 +122,27 @@ void test_conversion() void test_numtraits() { - std::cout << "epsilon = " << NumTraits::epsilon() << " (0x" << std::hex << NumTraits::epsilon().x << ")" << std::endl; - std::cout << "highest = " << NumTraits::highest() << " (0x" << std::hex << NumTraits::highest().x << ")" << std::endl; - std::cout << "lowest = " << NumTraits::lowest() << " (0x" << std::hex << NumTraits::lowest().x << ")" << std::endl; - std::cout << "min = " << (std::numeric_limits::min)() << " (0x" << std::hex << half((std::numeric_limits::min)()).x << ")" << std::endl; - std::cout << "denorm min = " << (std::numeric_limits::denorm_min)() << " (0x" << std::hex << half((std::numeric_limits::denorm_min)()).x << ")" << std::endl; - std::cout << "infinity = " << NumTraits::infinity() << " (0x" << std::hex << NumTraits::infinity().x << ")" << std::endl; - std::cout << "quiet nan = " << NumTraits::quiet_NaN() << " (0x" << std::hex << NumTraits::quiet_NaN().x << ")" << std::endl; - std::cout << "signaling nan = " << std::numeric_limits::signaling_NaN() << " (0x" << std::hex << std::numeric_limits::signaling_NaN().x << ")" << std::endl; + std::cout << "epsilon = " << NumTraits::epsilon() << " (0x" << std::hex << numext::bit_cast(NumTraits::epsilon()) << ")" << std::endl; + std::cout << "highest = " << NumTraits::highest() << " (0x" << std::hex << numext::bit_cast(NumTraits::highest()) << ")" << std::endl; + std::cout << "lowest = " << NumTraits::lowest() << " (0x" << std::hex << numext::bit_cast(NumTraits::lowest()) << ")" << std::endl; + std::cout << "min = " << (std::numeric_limits::min)() << " (0x" << std::hex << numext::bit_cast(half((std::numeric_limits::min)())) << ")" << std::endl; + std::cout << "denorm min = " << (std::numeric_limits::denorm_min)() << " (0x" << std::hex << numext::bit_cast(half((std::numeric_limits::denorm_min)())) << ")" << std::endl; + std::cout << "infinity = " << NumTraits::infinity() << " (0x" << std::hex << numext::bit_cast(NumTraits::infinity()) << ")" << std::endl; + std::cout << "quiet nan = " << NumTraits::quiet_NaN() << " (0x" << std::hex << numext::bit_cast(NumTraits::quiet_NaN()) << ")" << std::endl; + std::cout << "signaling nan = " << std::numeric_limits::signaling_NaN() << " (0x" << std::hex << numext::bit_cast(std::numeric_limits::signaling_NaN()) << ")" << std::endl; VERIFY(NumTraits::IsSigned); - VERIFY_IS_EQUAL( std::numeric_limits::infinity().x, half(std::numeric_limits::infinity()).x ); + VERIFY_IS_EQUAL( + numext::bit_cast(std::numeric_limits::infinity()), + numext::bit_cast(half(std::numeric_limits::infinity())) ); + VERIFY_IS_EQUAL( + numext::bit_cast(std::numeric_limits::quiet_NaN()), + numext::bit_cast(half(std::numeric_limits::quiet_NaN())) ); + VERIFY_IS_EQUAL( + numext::bit_cast(std::numeric_limits::signaling_NaN()), + numext::bit_cast(half(std::numeric_limits::signaling_NaN())) ); -// If we have a native fp16 types this becomes a nan == nan comparision so we have to disable it -#if !defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) - VERIFY_IS_EQUAL( std::numeric_limits::quiet_NaN().x, half(std::numeric_limits::quiet_NaN()).x ); - VERIFY_IS_EQUAL( std::numeric_limits::signaling_NaN().x, half(std::numeric_limits::signaling_NaN()).x ); -#endif VERIFY( (std::numeric_limits::min)() > half(0.f) ); VERIFY( (std::numeric_limits::denorm_min)() > half(0.f) ); VERIFY( (std::numeric_limits::min)()/half(2) > half(0.f) ); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h index ea286fee1..13450e1a7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -91,24 +91,21 @@ T RandomToTypeUniform(uint64_t* state, uint64_t stream) { template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half RandomToTypeUniform(uint64_t* state, uint64_t stream) { - Eigen::half result; - // Generate 10 random bits for the mantissa + // Generate 10 random bits for the mantissa, merge with exponent. unsigned rnd = PCG_XSH_RS_generator(state, stream); - result.x = static_cast(rnd & 0x3ffu); - // Set the exponent - result.x |= (static_cast(15) << 10); + const uint16_t half_bits = static_cast(rnd & 0x3ffu) | (static_cast(15) << 10); + Eigen::half result = Eigen::numext::bit_cast(half_bits); // Return the final result return result - Eigen::half(1.0f); } template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 RandomToTypeUniform(uint64_t* state, uint64_t stream) { - Eigen::bfloat16 result; - // Generate 7 random bits for the mantissa + + // Generate 7 random bits for the mantissa, merge with exponent. unsigned rnd = PCG_XSH_RS_generator(state, stream); - result.value = static_cast(rnd & 0x7fu); - // Set the exponent - result.value |= (static_cast(127) << 7); + const uint16_t half_bits = static_cast(rnd & 0x7fu) | (static_cast(127) << 7); + Eigen::bfloat16 result = Eigen::numext::bit_cast(half_bits); // Return the final result return result - Eigen::bfloat16(1.0f); } @@ -169,19 +166,19 @@ template class UniformRandomGenerator { uint64_t seed = 0) { m_state = PCG_XSH_RS_state(seed); #ifdef EIGEN_USE_SYCL - // In SYCL it is not possible to build PCG_XSH_RS_state in one step. + // In SYCL it is not possible to build PCG_XSH_RS_state in one step. // Therefor, we need two step to initializate the m_state. // IN SYCL, the constructor of the functor is s called on the CPU - // and we get the clock seed here from the CPU. However, This seed is + // and we get the clock seed here from the CPU. However, This seed is //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function. // and only available on the Operator() function (which is called on the GPU). - // Thus for CUDA (((CLOCK + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread - // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds - // the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction - // similar to CUDA Therefore, the thread Id injection is not available at this stage. - //However when the operator() is called the thread ID will be avilable. So inside the opeator, - // we add the thrreadID, BlockId,... (which is equivalent of i) - //to the seed and construct the unique m_state per thead similar to cuda. + // Thus for CUDA (((CLOCK + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread + // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds + // the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction + // similar to CUDA Therefore, the thread Id injection is not available at this stage. + //However when the operator() is called the thread ID will be avilable. So inside the opeator, + // we add the thrreadID, BlockId,... (which is equivalent of i) + //to the seed and construct the unique m_state per thead similar to cuda. m_exec_once =false; #endif } @@ -282,16 +279,16 @@ template class NormalRandomGenerator { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { m_state = PCG_XSH_RS_state(seed); #ifdef EIGEN_USE_SYCL - // In SYCL it is not possible to build PCG_XSH_RS_state in one step. + // In SYCL it is not possible to build PCG_XSH_RS_state in one step. // Therefor, we need two steps to initializate the m_state. // IN SYCL, the constructor of the functor is s called on the CPU - // and we get the clock seed here from the CPU. However, This seed is + // and we get the clock seed here from the CPU. However, This seed is //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function. // and only available on the Operator() function (which is called on the GPU). - // Therefore, the thread Id injection is not available at this stage. However when the operator() - //is called the thread ID will be avilable. So inside the opeator, - // we add the thrreadID, BlockId,... (which is equivalent of i) - //to the seed and construct the unique m_state per thead similar to cuda. + // Therefore, the thread Id injection is not available at this stage. However when the operator() + //is called the thread ID will be avilable. So inside the opeator, + // we add the thrreadID, BlockId,... (which is equivalent of i) + //to the seed and construct the unique m_state per thead similar to cuda. m_exec_once =false; #endif } diff --git a/unsupported/test/cxx11_tensor_random.cpp b/unsupported/test/cxx11_tensor_random.cpp index 4740d5811..b9d4c5584 100644 --- a/unsupported/test/cxx11_tensor_random.cpp +++ b/unsupported/test/cxx11_tensor_random.cpp @@ -11,9 +11,10 @@ #include +template static void test_default() { - Tensor vec(6); + Tensor vec(6); vec.setRandom(); // Fixme: we should check that the generated numbers follow a uniform @@ -23,10 +24,11 @@ static void test_default() } } +template static void test_normal() { - Tensor vec(6); - vec.setRandom>(); + Tensor vec(6); + vec.template setRandom>(); // Fixme: we should check that the generated numbers follow a gaussian // distribution instead. @@ -72,7 +74,13 @@ static void test_custom() EIGEN_DECLARE_TEST(cxx11_tensor_random) { - CALL_SUBTEST(test_default()); - CALL_SUBTEST(test_normal()); + CALL_SUBTEST((test_default())); + CALL_SUBTEST((test_normal())); + CALL_SUBTEST((test_default())); + CALL_SUBTEST((test_normal())); + CALL_SUBTEST((test_default())); + CALL_SUBTEST((test_normal())); + CALL_SUBTEST((test_default())); + CALL_SUBTEST((test_normal())); CALL_SUBTEST(test_custom()); }