aarch64: ACLE intrinsics convert BF16 to Float32

This patch enables intrinsics to convert BFloat16 scalar and vector operands to Float32 modes. The intrinsics are implemented by shifting each BFloat16 item 16 bits to left using shl/shll/shll2 instructions. gcc/ChangeLog: 2020-11-03 Dennis Zhang <dennis.zhang@arm.com> * config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry. (vbfcvt_high, bfcvt): Likewise. * config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry. (aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise. * config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic. * config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise. (vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise. gcc/testsuite/ChangeLog * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c (test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests. (test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
2025-04-15 12:41:03 +08:00 · 2020-11-03 13:00:51 +00:00 · 2020-11-03 13:00:51 +00:00 · f7d6961126
commit f7d6961126
parent 9d1b813d0f
7 changed files with 117 additions and 0 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,13 @@
+2020-11-03  Dennis Zhang  <dennis.zhang@arm.com>
+
+	* config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry.
+	(vbfcvt_high, bfcvt): Likewise.
+	* config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry.
+	(aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise.
+	* config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic.
+	* config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise.
+	(vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise.
+
 2020-11-02  Alan Modra  <amodra@gmail.com>

 	PR middle-end/97267
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@ -732,3 +732,8 @@
  VAR1 (UNOP, bfcvtn_q, 0, FP, v8bf)
  VAR1 (BINOP, bfcvtn2, 0, FP, v8bf)
  VAR1 (UNOP, bfcvt, 0, FP, bf)
+
+  /* Implemented by aarch64_{v}bfcvt{_high}<mode>.  */
+  VAR2 (UNOP, vbfcvt, 0, AUTO_FP, v4bf, v8bf)
+  VAR1 (UNOP, vbfcvt_high, 0, AUTO_FP, v8bf)
+  VAR1 (UNOP, bfcvt, 0, AUTO_FP, sf)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@ -7238,3 +7238,31 @@
  "bfcvt\\t%h0, %s1"
  [(set_attr "type" "f_cvt")]
 )
+
+;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
+(define_insn "aarch64_vbfcvt<mode>"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "shll\\t%0.4s, %1.4h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_vbfcvt_highv8bf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "shll2\\t%0.4s, %1.8h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_bfcvtsf"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
+		    UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "shl\\t%d0, %d1, #16"
+  [(set_attr "type" "neon_shift_imm")]
+)
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
  return __builtin_aarch64_bfcvtbf (__a);
 }

+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtah_f32_bf16 (bfloat16_t __a)
+{
+  return __builtin_aarch64_bfcvtsf (__a);
+}
+
 #pragma GCC pop_options

 #endif
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
  return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
 }

+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_f32_bf16 (bfloat16x4_t __a)
+{
+  return __builtin_aarch64_vbfcvtv4bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvtv8bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvt_highv8bf (__a);
+}
+
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvt_bf16_f32 (float32x4_t __a)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,9 @@
+2020-11-03  Dennis Zhang  <dennis.zhang@arm.com>
+
+	* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
+	(test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests.
+	(test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
+	
 2020-11-02  Alan Modra  <amodra@gmail.com>

 	PR middle-end/97267
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
 {
  return vcvth_bf16_f32 (a);
 }
+
+/*
+**test_vcvt_f32_bf16:
+**     shll	v0.4s, v0.4h, #16
+**     ret
+*/
+float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
+{
+  return vcvt_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_low_f32_bf16:
+**     shll	v0.4s, v0.4h, #16
+**     ret
+*/
+float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_low_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_high_f32_bf16:
+**     shll2	v0.4s, v0.8h, #16
+**     ret
+*/
+float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_high_f32_bf16 (a);
+}
+
+/*
+**test_vcvtah_f32_bf16:
+**     shl	d0, d0, #16
+**     ret
+*/
+float32_t test_vcvtah_f32_bf16 (bfloat16_t a)
+{
+  return vcvtah_f32_bf16 (a);
+}