aarch64: ACLE intrinsics convert BF16 to Float32

This patch enables intrinsics to convert BFloat16 scalar and vector
operands to Float32 modes. The intrinsics are implemented by shifting
each BFloat16 item 16 bits to left using shl/shll/shll2 instructions.

gcc/ChangeLog:

2020-11-03  Dennis Zhang  <dennis.zhang@arm.com>

	* config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry.
	(vbfcvt_high, bfcvt): Likewise.
	* config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry.
	(aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise.
	* config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic.
	* config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise.
	(vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise.

gcc/testsuite/ChangeLog

	* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
	(test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests.
	(test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
This commit is contained in:
Dennis Zhang 2020-11-03 13:00:51 +00:00
parent 9d1b813d0f
commit f7d6961126
7 changed files with 117 additions and 0 deletions

View File

@ -1,3 +1,13 @@
2020-11-03 Dennis Zhang <dennis.zhang@arm.com>
* config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry.
(vbfcvt_high, bfcvt): Likewise.
* config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry.
(aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise.
* config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic.
* config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise.
(vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise.
2020-11-02 Alan Modra <amodra@gmail.com>
PR middle-end/97267

View File

@ -732,3 +732,8 @@
VAR1 (UNOP, bfcvtn_q, 0, FP, v8bf)
VAR1 (BINOP, bfcvtn2, 0, FP, v8bf)
VAR1 (UNOP, bfcvt, 0, FP, bf)
/* Implemented by aarch64_{v}bfcvt{_high}<mode>. */
VAR2 (UNOP, vbfcvt, 0, AUTO_FP, v4bf, v8bf)
VAR1 (UNOP, vbfcvt_high, 0, AUTO_FP, v8bf)
VAR1 (UNOP, bfcvt, 0, AUTO_FP, sf)

View File

@ -7238,3 +7238,31 @@
"bfcvt\\t%h0, %s1"
[(set_attr "type" "f_cvt")]
)
;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
(define_insn "aarch64_vbfcvt<mode>"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
UNSPEC_BFCVTN))]
"TARGET_BF16_SIMD"
"shll\\t%0.4s, %1.4h, #16"
[(set_attr "type" "neon_shift_imm_long")]
)
(define_insn "aarch64_vbfcvt_highv8bf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
UNSPEC_BFCVTN2))]
"TARGET_BF16_SIMD"
"shll2\\t%0.4s, %1.8h, #16"
[(set_attr "type" "neon_shift_imm_long")]
)
(define_insn "aarch64_bfcvtsf"
[(set (match_operand:SF 0 "register_operand" "=w")
(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
UNSPEC_BFCVT))]
"TARGET_BF16_FP"
"shl\\t%d0, %d1, #16"
[(set_attr "type" "neon_shift_imm")]
)

View File

@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
return __builtin_aarch64_bfcvtbf (__a);
}
__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_f32_bf16 (bfloat16_t __a)
{
return __builtin_aarch64_bfcvtsf (__a);
}
#pragma GCC pop_options
#endif

View File

@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvt_f32_bf16 (bfloat16x4_t __a)
{
return __builtin_aarch64_vbfcvtv4bf (__a);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtq_low_f32_bf16 (bfloat16x8_t __a)
{
return __builtin_aarch64_vbfcvtv8bf (__a);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtq_high_f32_bf16 (bfloat16x8_t __a)
{
return __builtin_aarch64_vbfcvt_highv8bf (__a);
}
__extension__ extern __inline bfloat16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvt_bf16_f32 (float32x4_t __a)

View File

@ -1,3 +1,9 @@
2020-11-03 Dennis Zhang <dennis.zhang@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
(test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests.
(test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
2020-11-02 Alan Modra <amodra@gmail.com>
PR middle-end/97267

View File

@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
{
return vcvth_bf16_f32 (a);
}
/*
**test_vcvt_f32_bf16:
** shll v0.4s, v0.4h, #16
** ret
*/
float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
{
return vcvt_f32_bf16 (a);
}
/*
**test_vcvtq_low_f32_bf16:
** shll v0.4s, v0.4h, #16
** ret
*/
float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
{
return vcvtq_low_f32_bf16 (a);
}
/*
**test_vcvtq_high_f32_bf16:
** shll2 v0.4s, v0.8h, #16
** ret
*/
float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
{
return vcvtq_high_f32_bf16 (a);
}
/*
**test_vcvtah_f32_bf16:
** shl d0, d0, #16
** ret
*/
float32_t test_vcvtah_f32_bf16 (bfloat16_t a)
{
return vcvtah_f32_bf16 (a);
}