mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 22:11:30 +08:00
aarch64: intrinsics extract half of bf16 vector
This patch implements ACLE intrinsics vget_low_bf16 and vget_high_bf16 to extract lower or higher half from a bfloat16x8 vector. The vget_high_bf16 is done by 'dup' instruction. The vget_low_bf16 is just to return the lower half of a vector register. Tests include both big- and little-endian cases. gcc/ChangeLog: 2020-11-03 Dennis Zhang <dennis.zhang@arm.com> * config/aarch64/aarch64-simd-builtins.def (vget_lo_half): New entry. (vget_hi_half): Likewise. * config/aarch64/aarch64-simd.md (aarch64_vget_lo_halfv8bf): New entry. (aarch64_vget_hi_halfv8bf): Likewise. * config/aarch64/arm_neon.h (vget_low_bf16): New intrinsic. (vget_high_bf16): Likewise. gcc/testsuite/ChangeLog * gcc.target/aarch64/advsimd-intrinsics/bf16_get.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c: New test.
This commit is contained in:
parent
cee45e4912
commit
3553c65853
@ -1,3 +1,12 @@
|
||||
2020-11-03 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def (vget_lo_half): New entry.
|
||||
(vget_hi_half): Likewise.
|
||||
* config/aarch64/aarch64-simd.md (aarch64_vget_lo_halfv8bf): New entry.
|
||||
(aarch64_vget_hi_halfv8bf): Likewise.
|
||||
* config/aarch64/arm_neon.h (vget_low_bf16): New intrinsic.
|
||||
(vget_high_bf16): Likewise.
|
||||
|
||||
2020-11-03 Bernd Edlinger <bernd.edlinger@hotmail.de>
|
||||
|
||||
PR target/97205
|
||||
|
@ -722,6 +722,10 @@
|
||||
VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, ALL, v4sf)
|
||||
VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, ALL, v4sf)
|
||||
|
||||
/* Implemented by aarch64_vget_lo/hi_halfv8bf. */
|
||||
VAR1 (UNOP, vget_lo_half, 0, AUTO_FP, v8bf)
|
||||
VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf)
|
||||
|
||||
/* Implemented by aarch64_simd_<sur>mmlav16qi. */
|
||||
VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
|
||||
VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
|
||||
|
@ -7159,6 +7159,27 @@
|
||||
[(set_attr "type" "neon_dot<VDQSF:q>")]
|
||||
)
|
||||
|
||||
;; vget_low/high_bf16
|
||||
(define_expand "aarch64_vget_lo_halfv8bf"
|
||||
[(match_operand:V4BF 0 "register_operand")
|
||||
(match_operand:V8BF 1 "register_operand")]
|
||||
"TARGET_BF16_SIMD"
|
||||
{
|
||||
rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
|
||||
emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "aarch64_vget_hi_halfv8bf"
|
||||
[(match_operand:V4BF 0 "register_operand")
|
||||
(match_operand:V8BF 1 "register_operand")]
|
||||
"TARGET_BF16_SIMD"
|
||||
{
|
||||
rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
|
||||
emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; bfmmla
|
||||
(define_insn "aarch64_bfmmlaqv4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
|
@ -35680,6 +35680,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
|
||||
return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vget_low_bf16 (bfloat16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_vget_lo_halfv8bf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vget_high_bf16 (bfloat16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_vget_hi_halfv8bf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvt_f32_bf16 (bfloat16x4_t __a)
|
||||
|
@ -1,3 +1,8 @@
|
||||
2020-11-03 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* gcc.target/aarch64/advsimd-intrinsics/bf16_get.c: New test.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c: New test.
|
||||
|
||||
2020-11-03 Bernd Edlinger <bernd.edlinger@hotmail.de>
|
||||
|
||||
PR target/97205
|
||||
|
@ -0,0 +1,27 @@
|
||||
/* { dg-do assemble { target { aarch64*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
/* { dg-additional-options "-mbig-endian -save-temps" } */
|
||||
/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*
|
||||
**test_vget_low_bf16:
|
||||
** ret
|
||||
*/
|
||||
bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a)
|
||||
{
|
||||
return vget_low_bf16 (a);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vget_high_bf16:
|
||||
** dup d0, v0.d\[1\]
|
||||
** ret
|
||||
*/
|
||||
bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a)
|
||||
{
|
||||
return vget_high_bf16 (a);
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
/* { dg-do assemble { target { aarch64*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
/* { dg-additional-options "-save-temps" } */
|
||||
/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*
|
||||
**test_vget_low_bf16:
|
||||
** ret
|
||||
*/
|
||||
bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a)
|
||||
{
|
||||
return vget_low_bf16 (a);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vget_high_bf16:
|
||||
** dup d0, v0.d\[1\]
|
||||
** ret
|
||||
*/
|
||||
bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a)
|
||||
{
|
||||
return vget_high_bf16 (a);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user