mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-15 12:41:03 +08:00
aarch64: ACLE intrinsics convert BF16 to Float32
This patch enables intrinsics to convert BFloat16 scalar and vector operands to Float32 modes. The intrinsics are implemented by shifting each BFloat16 item 16 bits to left using shl/shll/shll2 instructions. gcc/ChangeLog: 2020-11-03 Dennis Zhang <dennis.zhang@arm.com> * config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry. (vbfcvt_high, bfcvt): Likewise. * config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry. (aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise. * config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic. * config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise. (vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise. gcc/testsuite/ChangeLog * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c (test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests. (test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
This commit is contained in:
parent
9d1b813d0f
commit
f7d6961126
@ -1,3 +1,13 @@
|
||||
2020-11-03 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry.
|
||||
(vbfcvt_high, bfcvt): Likewise.
|
||||
* config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry.
|
||||
(aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise.
|
||||
* config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic.
|
||||
* config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise.
|
||||
(vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise.
|
||||
|
||||
2020-11-02 Alan Modra <amodra@gmail.com>
|
||||
|
||||
PR middle-end/97267
|
||||
|
@ -732,3 +732,8 @@
|
||||
VAR1 (UNOP, bfcvtn_q, 0, FP, v8bf)
|
||||
VAR1 (BINOP, bfcvtn2, 0, FP, v8bf)
|
||||
VAR1 (UNOP, bfcvt, 0, FP, bf)
|
||||
|
||||
/* Implemented by aarch64_{v}bfcvt{_high}<mode>. */
|
||||
VAR2 (UNOP, vbfcvt, 0, AUTO_FP, v4bf, v8bf)
|
||||
VAR1 (UNOP, vbfcvt_high, 0, AUTO_FP, v8bf)
|
||||
VAR1 (UNOP, bfcvt, 0, AUTO_FP, sf)
|
||||
|
@ -7238,3 +7238,31 @@
|
||||
"bfcvt\\t%h0, %s1"
|
||||
[(set_attr "type" "f_cvt")]
|
||||
)
|
||||
|
||||
;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
|
||||
(define_insn "aarch64_vbfcvt<mode>"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
|
||||
UNSPEC_BFCVTN))]
|
||||
"TARGET_BF16_SIMD"
|
||||
"shll\\t%0.4s, %1.4h, #16"
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_vbfcvt_highv8bf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
|
||||
UNSPEC_BFCVTN2))]
|
||||
"TARGET_BF16_SIMD"
|
||||
"shll2\\t%0.4s, %1.8h, #16"
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_bfcvtsf"
|
||||
[(set (match_operand:SF 0 "register_operand" "=w")
|
||||
(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
|
||||
UNSPEC_BFCVT))]
|
||||
"TARGET_BF16_FP"
|
||||
"shl\\t%d0, %d1, #16"
|
||||
[(set_attr "type" "neon_shift_imm")]
|
||||
)
|
||||
|
@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
|
||||
return __builtin_aarch64_bfcvtbf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvtah_f32_bf16 (bfloat16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_bfcvtsf (__a);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#endif
|
||||
|
@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
|
||||
return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvt_f32_bf16 (bfloat16x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_vbfcvtv4bf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvtq_low_f32_bf16 (bfloat16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_vbfcvtv8bf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvtq_high_f32_bf16 (bfloat16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_vbfcvt_highv8bf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvt_bf16_f32 (float32x4_t __a)
|
||||
|
@ -1,3 +1,9 @@
|
||||
2020-11-03 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
|
||||
(test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests.
|
||||
(test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
|
||||
|
||||
2020-11-02 Alan Modra <amodra@gmail.com>
|
||||
|
||||
PR middle-end/97267
|
||||
|
@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
|
||||
{
|
||||
return vcvth_bf16_f32 (a);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vcvt_f32_bf16:
|
||||
** shll v0.4s, v0.4h, #16
|
||||
** ret
|
||||
*/
|
||||
float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
|
||||
{
|
||||
return vcvt_f32_bf16 (a);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vcvtq_low_f32_bf16:
|
||||
** shll v0.4s, v0.4h, #16
|
||||
** ret
|
||||
*/
|
||||
float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
|
||||
{
|
||||
return vcvtq_low_f32_bf16 (a);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vcvtq_high_f32_bf16:
|
||||
** shll2 v0.4s, v0.8h, #16
|
||||
** ret
|
||||
*/
|
||||
float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
|
||||
{
|
||||
return vcvtq_high_f32_bf16 (a);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vcvtah_f32_bf16:
|
||||
** shl d0, d0, #16
|
||||
** ret
|
||||
*/
|
||||
float32_t test_vcvtah_f32_bf16 (bfloat16_t a)
|
||||
{
|
||||
return vcvtah_f32_bf16 (a);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user