Add bit_cast for half/bfloat to/from uint16_t, fix TensorRandom

The existing `TensorRandom.h` implementation makes the assumption that `half` (`bfloat16`) has a `uint16_t` member `x` (`value`), which is not always true. This currently fails on arm64, where `x` has type `__fp16`. Added `bit_cast` specializations to allow casting to/from `uint16_t` for both `half` and `bfloat16`. Also added tests in `half_float`, `bfloat16_float`, and `cxx11_tensor_random` to catch these errors in the future.
2024-12-21 07:19:46 +08:00 · 2020-11-17 15:32:44 -08:00 · 2020-11-17 15:32:44 -08:00 · 17268b155d
commit 17268b155d
parent 41d5d5334b
6 changed files with 196 additions and 150 deletions
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h
@ -69,7 +69,7 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
  template<class T>
  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const T& val)
      : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
-  
+
  explicit EIGEN_DEVICE_FUNC bfloat16(float f)
      : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}

@ -88,7 +88,7 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
    // +0.0 and -0.0 become false, everything else becomes true.
    return (value & 0x7fff) != 0;
  }
-#endif 
+#endif

 };
 } // namespace Eigen
@ -272,10 +272,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const
  return output;
 }

-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
  return __bfloat16_raw(value);
 }

+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
+  return bf.value;
+}
+
 // float_to_bfloat16_rtne template specialization that does not make any
 // assumption about the value of its function argument (ff).
 template <>
@ -454,7 +458,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<fals
 // float_to_bfloat16_rtne template specialization that assumes that its function
 // argument (ff) is either a normal floating point number, or +/-infinity, or
 // zero. Used to improve the runtime performance of conversion from an integer
-// type to bfloat16.  
+// type to bfloat16.
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
 #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
@ -691,7 +695,17 @@ bool (isfinite)(const Eigen::bfloat16& h) {
  return (bfloat16_impl::isfinite)(h);
 }

-} // namespace numext
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
+  return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
+}
+
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
+  return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
+}
+
+}  // namespace numext
 }  // namespace Eigen

 #endif // EIGEN_BFLOAT16_H
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@ -494,6 +494,19 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_h
 #endif
 }

+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_half_as_uint16(const __half_raw& h) {
+  // HIP/CUDA/Default have a member 'x' of type uint16_t.
+  // For ARM64 native half, the member 'x' is of type __fp16, so we need to bit-cast.
+  // For SYCL, cl::sycl::half is _Float16, so cast directly.
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+  return numext::bit_cast<numext::uint16_t>(h.x);
+#elif defined(SYCL_DEVICE_ONLY)
+  return numext::bit_cast<numext::uint16_t>(h);
+#else
+  return h.x;
+#endif
+}
+
 union float32_bits {
  unsigned int u;
  float f;
@ -812,10 +825,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr)
 }
 #endif

-#if defined(EIGEN_GPU_COMPILE_PHASE)
 namespace Eigen {
 namespace numext {

+#if defined(EIGEN_GPU_COMPILE_PHASE)
+
 template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isnan)(const Eigen::half& h) {
  return (half_impl::isnan)(h);
@ -830,8 +844,20 @@ template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const Eigen::half& h) {
  return (half_impl::isfinite)(h);
 }
-}  // namespace numext
-}  // namespace Eigen
+
 #endif

+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bit_cast<Eigen::half, uint16_t>(const uint16_t& src) {
+  return Eigen::half(Eigen::half_impl::raw_uint16_to_half(src));
+}
+
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::half>(const Eigen::half& src) {
+  return Eigen::half_impl::raw_half_as_uint16(src);
+}
+
+}  // namespace numext
+}  // namespace Eigen
+
 #endif // EIGEN_HALF_H
--- a/test/bfloat16_float.cpp
+++ b/test/bfloat16_float.cpp
@ -13,6 +13,9 @@

 #include <Eigen/src/Core/arch/Default/BFloat16.h>

+#define VERIFY_BFLOAT16_BITS_EQUAL(h, bits) \
+  VERIFY_IS_EQUAL((numext::bit_cast<numext::uint16_t>(h)), (static_cast<numext::uint16_t>(bits)))
+
 // Make sure it's possible to forward declare Eigen::bfloat16
 namespace Eigen {
 struct bfloat16;
@ -58,31 +61,45 @@ void test_conversion()
 {
  using Eigen::bfloat16_impl::__bfloat16_raw;

+  // Round-trip casts
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(1.0f))),
+    bfloat16(1.0f));
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(0.5f))),
+    bfloat16(0.5f));
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(-0.33333f))),
+    bfloat16(-0.33333f));
+   VERIFY_IS_EQUAL(
+    numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(0.0f))),
+    bfloat16(0.0f));
+
  // Conversion from float.
-  VERIFY_IS_EQUAL(bfloat16(1.0f).value, 0x3f80);
-  VERIFY_IS_EQUAL(bfloat16(0.5f).value, 0x3f00);
-  VERIFY_IS_EQUAL(bfloat16(0.33333f).value, 0x3eab);
-  VERIFY_IS_EQUAL(bfloat16(3.38e38f).value, 0x7f7e);
-  VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80);  // Becomes infinity.
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1.0f), 0x3f80);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f), 0x3f00);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.33333f), 0x3eab);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.38e38f), 0x7f7e);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.40e38f), 0x7f80);  // Becomes infinity.

  // Verify round-to-nearest-even behavior.
  float val1 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c00)));
  float val2 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c01)));
  float val3 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c02)));
-  VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00);
-  VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val1 + val2)), 0x3c00);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val2 + val3)), 0x3c02);

  // Conversion from int.
-  VERIFY_IS_EQUAL(bfloat16(-1).value, 0xbf80);
-  VERIFY_IS_EQUAL(bfloat16(0).value, 0x0000);
-  VERIFY_IS_EQUAL(bfloat16(1).value, 0x3f80);
-  VERIFY_IS_EQUAL(bfloat16(2).value, 0x4000);
-  VERIFY_IS_EQUAL(bfloat16(3).value, 0x4040);
-  VERIFY_IS_EQUAL(bfloat16(12).value, 0x4140);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-1), 0xbf80);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0), 0x0000);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1), 0x3f80);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(2), 0x4000);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3), 0x4040);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(12), 0x4140);

  // Conversion from bool.
-  VERIFY_IS_EQUAL(bfloat16(false).value, 0x0000);
-  VERIFY_IS_EQUAL(bfloat16(true).value, 0x3f80);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(false), 0x0000);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(true), 0x3f80);

  // Conversion to bool
  VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(3)), true);
@ -102,8 +119,8 @@ void test_conversion()
  VERIFY_IS_EQUAL(bfloat16(0.0f), bfloat16(0.0f));
  VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(0.0f));
  VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(-0.0f));
-  VERIFY_IS_EQUAL(bfloat16(0.0f).value, 0x0000);
-  VERIFY_IS_EQUAL(bfloat16(-0.0f).value, 0x8000);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.0f), 0x0000);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-0.0f), 0x8000);

  // Flush denormals to zero
  for (float denorm = -std::numeric_limits<float>::denorm_min();
@ -117,16 +134,16 @@ void test_conversion()
    VERIFY_IS_EQUAL(bfloat16(denorm), false);

    if (std::signbit(denorm)) {
-      VERIFY_IS_EQUAL(bf_trunc.value, 0x8000);
+      VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x8000);
    } else {
-      VERIFY_IS_EQUAL(bf_trunc.value, 0x0000);
+      VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x0000);
    }
    bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(denorm);
    VERIFY_IS_EQUAL(static_cast<float>(bf_round), 0.0f);
    if (std::signbit(denorm)) {
-      VERIFY_IS_EQUAL(bf_round.value, 0x8000);
+      VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x8000);
    } else {
-      VERIFY_IS_EQUAL(bf_round.value, 0x0000);
+      VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x0000);
    }
  }

@ -231,33 +248,35 @@ void test_conversion()
  VERIFY((numext::isinf)(bfloat16(__bfloat16_raw(0x7f80))));
  VERIFY((numext::isnan)(bfloat16(__bfloat16_raw(0x7fc0))));

-  VERIFY_IS_EQUAL(bfloat16(BinaryToFloat(0x0, 0xff, 0x40, 0x0)).value, 0x7fc0);
-  VERIFY_IS_EQUAL(bfloat16(BinaryToFloat(0x1, 0xff, 0x40, 0x0)).value, 0xffc0);
-  VERIFY_IS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
-                      BinaryToFloat(0x0, 0xff, 0x40, 0x0))
-                      .value,
-                  0x7fc0);
-  VERIFY_IS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
-                      BinaryToFloat(0x1, 0xff, 0x40, 0x0))
-                      .value,
-                  0xffc0);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x0, 0xff, 0x40, 0x0)), 0x7fc0);
+  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x1, 0xff, 0x40, 0x0)), 0xffc0);
+  VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
+                               BinaryToFloat(0x0, 0xff, 0x40, 0x0)),
+                             0x7fc0);
+  VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
+                               BinaryToFloat(0x1, 0xff, 0x40, 0x0)),
+                             0xffc0);
 }

 void test_numtraits()
 {
-  std::cout << "epsilon       = " << NumTraits<bfloat16>::epsilon() << "  (0x" << std::hex << NumTraits<bfloat16>::epsilon().value << ")" << std::endl;
-  std::cout << "highest       = " << NumTraits<bfloat16>::highest() << "  (0x" << std::hex << NumTraits<bfloat16>::highest().value << ")" << std::endl;
-  std::cout << "lowest        = " << NumTraits<bfloat16>::lowest() << "  (0x" << std::hex << NumTraits<bfloat16>::lowest().value << ")" << std::endl;
-  std::cout << "min           = " << (std::numeric_limits<bfloat16>::min)() << "  (0x" << std::hex << (std::numeric_limits<bfloat16>::min)().value << ")" << std::endl;
-  std::cout << "denorm min    = " << (std::numeric_limits<bfloat16>::denorm_min)() << "  (0x" << std::hex << (std::numeric_limits<bfloat16>::denorm_min)().value << ")" << std::endl;
-  std::cout << "infinity      = " << NumTraits<bfloat16>::infinity() << "  (0x" << std::hex << NumTraits<bfloat16>::infinity().value << ")" << std::endl;
-  std::cout << "quiet nan     = " << NumTraits<bfloat16>::quiet_NaN() << "  (0x" << std::hex << NumTraits<bfloat16>::quiet_NaN().value << ")" << std::endl;
-  std::cout << "signaling nan = " << std::numeric_limits<bfloat16>::signaling_NaN() << "  (0x" << std::hex << std::numeric_limits<bfloat16>::signaling_NaN().value << ")" << std::endl;
+  std::cout << "epsilon       = " << NumTraits<bfloat16>::epsilon() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::epsilon()) << ")" << std::endl;
+  std::cout << "highest       = " << NumTraits<bfloat16>::highest() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::highest()) << ")" << std::endl;
+  std::cout << "lowest        = " << NumTraits<bfloat16>::lowest() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::lowest()) << ")" << std::endl;
+  std::cout << "min           = " << (std::numeric_limits<bfloat16>::min)() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>((std::numeric_limits<bfloat16>::min)()) << ")" << std::endl;
+  std::cout << "denorm min    = " << (std::numeric_limits<bfloat16>::denorm_min)() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>((std::numeric_limits<bfloat16>::denorm_min)()) << ")" << std::endl;
+  std::cout << "infinity      = " << NumTraits<bfloat16>::infinity() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::infinity()) << ")" << std::endl;
+  std::cout << "quiet nan     = " << NumTraits<bfloat16>::quiet_NaN() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::quiet_NaN()) << ")" << std::endl;
+  std::cout << "signaling nan = " << std::numeric_limits<bfloat16>::signaling_NaN() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(std::numeric_limits<bfloat16>::signaling_NaN()) << ")" << std::endl;

  VERIFY(NumTraits<bfloat16>::IsSigned);

-  VERIFY_IS_EQUAL( std::numeric_limits<bfloat16>::infinity().value, bfloat16(std::numeric_limits<float>::infinity()).value );
-  VERIFY_IS_EQUAL( std::numeric_limits<bfloat16>::quiet_NaN().value, bfloat16(std::numeric_limits<float>::quiet_NaN()).value );
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<numext::uint16_t>(std::numeric_limits<bfloat16>::infinity()),
+    numext::bit_cast<numext::uint16_t>(bfloat16(std::numeric_limits<float>::infinity())) );
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<numext::uint16_t>(std::numeric_limits<bfloat16>::quiet_NaN()),
+    numext::bit_cast<numext::uint16_t>(bfloat16(std::numeric_limits<float>::quiet_NaN())) );
  VERIFY( (std::numeric_limits<bfloat16>::min)() > bfloat16(0.f) );
  VERIFY( (std::numeric_limits<bfloat16>::denorm_min)() > bfloat16(0.f) );
  VERIFY_IS_EQUAL( (std::numeric_limits<bfloat16>::denorm_min)()/bfloat16(2), bfloat16(0.f) );
--- a/test/half_float.cpp
+++ b/test/half_float.cpp
@ -11,6 +11,9 @@

 #include <Eigen/src/Core/arch/Default/Half.h>

+#define VERIFY_HALF_BITS_EQUAL(h, bits) \
+  VERIFY_IS_EQUAL((numext::bit_cast<numext::uint16_t>(h)), (static_cast<numext::uint16_t>(bits)))
+
 // Make sure it's possible to forward declare Eigen::half
 namespace Eigen {
 struct half;
@ -22,75 +25,51 @@ void test_conversion()
 {
  using Eigen::half_impl::__half_raw;

-  // We don't use a uint16_t raw member x if the platform has native Arm __fp16
-  // support
-#if !defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+  // Round-trip bit-cast with uint16.
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<half>(numext::bit_cast<numext::uint16_t>(half(1.0f))),
+    half(1.0f));
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<half>(numext::bit_cast<numext::uint16_t>(half(0.5f))),
+    half(0.5f));
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<half>(numext::bit_cast<numext::uint16_t>(half(-0.33333f))),
+    half(-0.33333f));
+   VERIFY_IS_EQUAL(
+    numext::bit_cast<half>(numext::bit_cast<numext::uint16_t>(half(0.0f))),
+    half(0.0f));
+
  // Conversion from float.
-  VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00);
-  VERIFY_IS_EQUAL(half(0.5f).x, 0x3800);
-  VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555);
-  VERIFY_IS_EQUAL(half(0.0f).x, 0x0000);
-  VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000);
-  VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff);
-  VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00);  // Becomes infinity.
+  VERIFY_HALF_BITS_EQUAL(half(1.0f), 0x3c00);
+  VERIFY_HALF_BITS_EQUAL(half(0.5f), 0x3800);
+  VERIFY_HALF_BITS_EQUAL(half(0.33333f), 0x3555);
+  VERIFY_HALF_BITS_EQUAL(half(0.0f), 0x0000);
+  VERIFY_HALF_BITS_EQUAL(half(-0.0f), 0x8000);
+  VERIFY_HALF_BITS_EQUAL(half(65504.0f), 0x7bff);
+  VERIFY_HALF_BITS_EQUAL(half(65536.0f), 0x7c00);  // Becomes infinity.

  // Denormals.
-  VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001);
-  VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001);
-  VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
+  VERIFY_HALF_BITS_EQUAL(half(-5.96046e-08f), 0x8001);
+  VERIFY_HALF_BITS_EQUAL(half(5.96046e-08f), 0x0001);
+  VERIFY_HALF_BITS_EQUAL(half(1.19209e-07f), 0x0002);

  // Verify round-to-nearest-even behavior.
  float val1 = float(half(__half_raw(0x3c00)));
  float val2 = float(half(__half_raw(0x3c01)));
  float val3 = float(half(__half_raw(0x3c02)));
-  VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00);
-  VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02);
+  VERIFY_HALF_BITS_EQUAL(half(0.5f * (val1 + val2)), 0x3c00);
+  VERIFY_HALF_BITS_EQUAL(half(0.5f * (val2 + val3)), 0x3c02);

  // Conversion from int.
-  VERIFY_IS_EQUAL(half(-1).x, 0xbc00);
-  VERIFY_IS_EQUAL(half(0).x, 0x0000);
-  VERIFY_IS_EQUAL(half(1).x, 0x3c00);
-  VERIFY_IS_EQUAL(half(2).x, 0x4000);
-  VERIFY_IS_EQUAL(half(3).x, 0x4200);
+  VERIFY_HALF_BITS_EQUAL(half(-1), 0xbc00);
+  VERIFY_HALF_BITS_EQUAL(half(0), 0x0000);
+  VERIFY_HALF_BITS_EQUAL(half(1), 0x3c00);
+  VERIFY_HALF_BITS_EQUAL(half(2), 0x4000);
+  VERIFY_HALF_BITS_EQUAL(half(3), 0x4200);

  // Conversion from bool.
-  VERIFY_IS_EQUAL(half(false).x, 0x0000);
-  VERIFY_IS_EQUAL(half(true).x, 0x3c00);
-#endif
-
-#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
-   // Conversion from float.
-  VERIFY_IS_EQUAL(half(1.0f).x, __fp16(1.0f));
-  VERIFY_IS_EQUAL(half(0.5f).x, __fp16(0.5f));
-  VERIFY_IS_EQUAL(half(0.33333f).x, __fp16(0.33333f));
-  VERIFY_IS_EQUAL(half(0.0f).x, __fp16(0.0f));
-  VERIFY_IS_EQUAL(half(-0.0f).x, __fp16(-0.0f));
-  VERIFY_IS_EQUAL(half(65504.0f).x, __fp16(65504.0f));
-  VERIFY_IS_EQUAL(half(65536.0f).x, __fp16(65536.0f));  // Becomes infinity.
-
-  // Denormals.
-  VERIFY_IS_EQUAL(half(-5.96046e-08f).x, __fp16(-5.96046e-08f));
-  VERIFY_IS_EQUAL(half(5.96046e-08f).x, __fp16(5.96046e-08f));
-  VERIFY_IS_EQUAL(half(1.19209e-07f).x, __fp16(1.19209e-07f));
-
-  // Verify round-to-nearest-even behavior.
-  float val1 = float(half(__half_raw(0x3c00)));
-  float val2 = float(half(__half_raw(0x3c01)));
-  float val3 = float(half(__half_raw(0x3c02)));
-  VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, __fp16(0.5f * (val1 + val2)));
-  VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, __fp16(0.5f * (val2 + val3)));
-
-  // Conversion from int.
-  VERIFY_IS_EQUAL(half(-1).x, __fp16(-1));
-  VERIFY_IS_EQUAL(half(0).x, __fp16(0));
-  VERIFY_IS_EQUAL(half(1).x, __fp16(1));
-  VERIFY_IS_EQUAL(half(2).x, __fp16(2));
-  VERIFY_IS_EQUAL(half(3).x, __fp16(3));
-
-  // Conversion from bool.
-  VERIFY_IS_EQUAL(half(false).x, __fp16(false));
-  VERIFY_IS_EQUAL(half(true).x, __fp16(true));
-#endif
+  VERIFY_HALF_BITS_EQUAL(half(false), 0x0000);
+  VERIFY_HALF_BITS_EQUAL(half(true), 0x3c00);

  // Conversion to float.
  VERIFY_IS_EQUAL(float(half(__half_raw(0x0000))), 0.0f);
@ -143,24 +122,27 @@ void test_conversion()

 void test_numtraits()
 {
-  std::cout << "epsilon       = " << NumTraits<half>::epsilon() << "  (0x" << std::hex << NumTraits<half>::epsilon().x << ")" << std::endl;
-  std::cout << "highest       = " << NumTraits<half>::highest() << "  (0x" << std::hex << NumTraits<half>::highest().x << ")" << std::endl;
-  std::cout << "lowest        = " << NumTraits<half>::lowest() << "  (0x" << std::hex << NumTraits<half>::lowest().x << ")" << std::endl;
-  std::cout << "min           = " << (std::numeric_limits<half>::min)() << "  (0x" << std::hex << half((std::numeric_limits<half>::min)()).x << ")" << std::endl;
-  std::cout << "denorm min    = " << (std::numeric_limits<half>::denorm_min)() << "  (0x" << std::hex << half((std::numeric_limits<half>::denorm_min)()).x << ")" << std::endl;
-  std::cout << "infinity      = " << NumTraits<half>::infinity() << "  (0x" << std::hex << NumTraits<half>::infinity().x << ")" << std::endl;
-  std::cout << "quiet nan     = " << NumTraits<half>::quiet_NaN() << "  (0x" << std::hex << NumTraits<half>::quiet_NaN().x << ")" << std::endl;
-  std::cout << "signaling nan = " << std::numeric_limits<half>::signaling_NaN() << "  (0x" << std::hex << std::numeric_limits<half>::signaling_NaN().x << ")" << std::endl;
+  std::cout << "epsilon       = " << NumTraits<half>::epsilon() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<half>::epsilon()) << ")" << std::endl;
+  std::cout << "highest       = " << NumTraits<half>::highest() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<half>::highest()) << ")" << std::endl;
+  std::cout << "lowest        = " << NumTraits<half>::lowest() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<half>::lowest()) << ")" << std::endl;
+  std::cout << "min           = " << (std::numeric_limits<half>::min)() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(half((std::numeric_limits<half>::min)())) << ")" << std::endl;
+  std::cout << "denorm min    = " << (std::numeric_limits<half>::denorm_min)() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(half((std::numeric_limits<half>::denorm_min)())) << ")" << std::endl;
+  std::cout << "infinity      = " << NumTraits<half>::infinity() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<half>::infinity()) << ")" << std::endl;
+  std::cout << "quiet nan     = " << NumTraits<half>::quiet_NaN() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<half>::quiet_NaN()) << ")" << std::endl;
+  std::cout << "signaling nan = " << std::numeric_limits<half>::signaling_NaN() << "  (0x" << std::hex << numext::bit_cast<numext::uint16_t>(std::numeric_limits<half>::signaling_NaN()) << ")" << std::endl;

  VERIFY(NumTraits<half>::IsSigned);

-  VERIFY_IS_EQUAL( std::numeric_limits<half>::infinity().x, half(std::numeric_limits<float>::infinity()).x );
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<numext::uint16_t>(std::numeric_limits<half>::infinity()),
+    numext::bit_cast<numext::uint16_t>(half(std::numeric_limits<float>::infinity())) );
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<numext::uint16_t>(std::numeric_limits<half>::quiet_NaN()),
+    numext::bit_cast<numext::uint16_t>(half(std::numeric_limits<float>::quiet_NaN())) );
+  VERIFY_IS_EQUAL(
+    numext::bit_cast<numext::uint16_t>(std::numeric_limits<half>::signaling_NaN()),
+    numext::bit_cast<numext::uint16_t>(half(std::numeric_limits<float>::signaling_NaN())) );

-// If we have a native fp16 types this becomes a nan == nan comparision so we have to disable it
-#if !defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
-  VERIFY_IS_EQUAL( std::numeric_limits<half>::quiet_NaN().x, half(std::numeric_limits<float>::quiet_NaN()).x );
-  VERIFY_IS_EQUAL( std::numeric_limits<half>::signaling_NaN().x, half(std::numeric_limits<float>::signaling_NaN()).x );
-#endif
  VERIFY( (std::numeric_limits<half>::min)() > half(0.f) );
  VERIFY( (std::numeric_limits<half>::denorm_min)() > half(0.f) );
  VERIFY( (std::numeric_limits<half>::min)()/half(2) > half(0.f) );
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
@ -91,24 +91,21 @@ T RandomToTypeUniform(uint64_t* state, uint64_t stream) {

 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state, uint64_t stream) {
-  Eigen::half result;
-  // Generate 10 random bits for the mantissa
+  // Generate 10 random bits for the mantissa, merge with exponent.
  unsigned rnd = PCG_XSH_RS_generator(state, stream);
-  result.x = static_cast<uint16_t>(rnd & 0x3ffu);
-  // Set the exponent
-  result.x |= (static_cast<uint16_t>(15) << 10);
+  const uint16_t half_bits = static_cast<uint16_t>(rnd & 0x3ffu) | (static_cast<uint16_t>(15) << 10);
+  Eigen::half result = Eigen::numext::bit_cast<Eigen::half>(half_bits);
  // Return the final result
  return result - Eigen::half(1.0f);
 }

 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 Eigen::bfloat16 RandomToTypeUniform<Eigen::bfloat16>(uint64_t* state, uint64_t stream) {
-  Eigen::bfloat16 result;
-  // Generate 7 random bits for the mantissa
+
+  // Generate 7 random bits for the mantissa, merge with exponent.
  unsigned rnd = PCG_XSH_RS_generator(state, stream);
-  result.value = static_cast<uint16_t>(rnd & 0x7fu);
-  // Set the exponent
-  result.value |= (static_cast<uint16_t>(127) << 7);
+  const uint16_t half_bits = static_cast<uint16_t>(rnd & 0x7fu) | (static_cast<uint16_t>(127) << 7);
+  Eigen::bfloat16 result = Eigen::numext::bit_cast<Eigen::bfloat16>(half_bits);
  // Return the final result
  return result - Eigen::bfloat16(1.0f);
 }
@ -169,19 +166,19 @@ template <typename T> class UniformRandomGenerator {
      uint64_t seed = 0) {
    m_state = PCG_XSH_RS_state(seed);
    #ifdef EIGEN_USE_SYCL
-    // In SYCL it is not possible to build PCG_XSH_RS_state in one step. 
+    // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
    // Therefor, we need two step to initializate the m_state.
    // IN SYCL, the constructor of the functor is s called on the CPU
-    // and we get the clock seed here from the CPU. However, This seed is 
+    // and we get the clock seed here from the CPU. However, This seed is
    //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
    // and only  available on the Operator() function (which is called on the GPU).
-    // Thus for CUDA (((CLOCK  + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread 
-    // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds  
-    // the  (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction 
-    // similar to CUDA Therefore, the thread Id injection is not available at this stage. 
-    //However when the operator() is called the thread ID will be avilable. So inside the opeator, 
-    // we add the thrreadID, BlockId,... (which is equivalent of i) 
-    //to the seed and construct the unique m_state per thead similar to cuda.  
+    // Thus for CUDA (((CLOCK  + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread
+    // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds
+    // the  (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction
+    // similar to CUDA Therefore, the thread Id injection is not available at this stage.
+    //However when the operator() is called the thread ID will be avilable. So inside the opeator,
+    // we add the thrreadID, BlockId,... (which is equivalent of i)
+    //to the seed and construct the unique m_state per thead similar to cuda.
    m_exec_once =false;
   #endif
  }
@ -282,16 +279,16 @@ template <typename T> class NormalRandomGenerator {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
    m_state = PCG_XSH_RS_state(seed);
    #ifdef EIGEN_USE_SYCL
-    // In SYCL it is not possible to build PCG_XSH_RS_state in one step. 
+    // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
    // Therefor, we need two steps to initializate the m_state.
    // IN SYCL, the constructor of the functor is s called on the CPU
-    // and we get the clock seed here from the CPU. However, This seed is 
+    // and we get the clock seed here from the CPU. However, This seed is
    //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
    // and only  available on the Operator() function (which is called on the GPU).
-    // Therefore, the thread Id injection is not available at this stage. However when the operator() 
-    //is called the thread ID will be avilable. So inside the opeator, 
-    // we add the thrreadID, BlockId,... (which is equivalent of i) 
-    //to the seed and construct the unique m_state per thead similar to cuda.  
+    // Therefore, the thread Id injection is not available at this stage. However when the operator()
+    //is called the thread ID will be avilable. So inside the opeator,
+    // we add the thrreadID, BlockId,... (which is equivalent of i)
+    //to the seed and construct the unique m_state per thead similar to cuda.
    m_exec_once =false;
   #endif
  }
--- a/unsupported/test/cxx11_tensor_random.cpp
+++ b/unsupported/test/cxx11_tensor_random.cpp
@ -11,9 +11,10 @@

 #include <Eigen/CXX11/Tensor>

+template<typename Scalar>
 static void test_default()
 {
-  Tensor<float, 1> vec(6);
+  Tensor<Scalar, 1> vec(6);
  vec.setRandom();

  // Fixme: we should check that the generated numbers follow a uniform
@ -23,10 +24,11 @@ static void test_default()
  }
 }

+template<typename Scalar>
 static void test_normal()
 {
-  Tensor<float, 1> vec(6);
-  vec.setRandom<Eigen::internal::NormalRandomGenerator<float>>();
+  Tensor<Scalar, 1> vec(6);
+  vec.template setRandom<Eigen::internal::NormalRandomGenerator<Scalar>>();

  // Fixme: we should check that the generated numbers follow a gaussian
  // distribution instead.
@ -72,7 +74,13 @@ static void test_custom()

 EIGEN_DECLARE_TEST(cxx11_tensor_random)
 {
-  CALL_SUBTEST(test_default());
-  CALL_SUBTEST(test_normal());
+  CALL_SUBTEST((test_default<float>()));
+  CALL_SUBTEST((test_normal<float>()));
+  CALL_SUBTEST((test_default<double>()));
+  CALL_SUBTEST((test_normal<double>()));
+  CALL_SUBTEST((test_default<Eigen::half>()));
+  CALL_SUBTEST((test_normal<Eigen::half>()));
+  CALL_SUBTEST((test_default<Eigen::bfloat16>()));
+  CALL_SUBTEST((test_normal<Eigen::bfloat16>()));
  CALL_SUBTEST(test_custom());
 }