mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-04 02:50:29 +08:00
arm: Add vld1_bf16 + vld1q_bf16 intrinsics
gcc/ChangeLog 2020-10-29 Andrea Corallo <andrea.corallo@arm.com> * config/arm/arm-builtins.c (VAR14): Define macro. * config/arm/arm_neon_builtins.def: Touch for: __builtin_neon_vld1v4bf, __builtin_neon_vld1v8bf. * config/arm/arm_neon.h (vld1_bf16, vld1q_bf16): Add intrinsics. gcc/testsuite/ChangeLog 2020-10-29 Andrea Corallo <andrea.corallo@arm.com> * gcc.target/arm/simd/vld1_bf16_1.c: New test.
This commit is contained in:
parent
d65303b699
commit
890076673d
@ -946,6 +946,9 @@ typedef struct {
|
||||
#define VAR13(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
|
||||
VAR12 (T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
|
||||
VAR1 (T, N, M)
|
||||
#define VAR14(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M, O) \
|
||||
VAR13 (T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
|
||||
VAR1 (T, N, O)
|
||||
|
||||
/* The builtin data can be found in arm_neon_builtins.def, arm_vfp_builtins.def
|
||||
and arm_acle_builtins.def. The entries in arm_neon_builtins.def require
|
||||
|
@ -19557,6 +19557,20 @@ vst4q_bf16 (bfloat16_t * __ptr, bfloat16x8x4_t __val)
|
||||
return __builtin_neon_vst4v8bf (__ptr, __bu.__o);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_bf16 (bfloat16_t const * __ptr)
|
||||
{
|
||||
return __builtin_neon_vld1v4bf (__ptr);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
return __builtin_neon_vld1v8bf (__ptr);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld2_bf16 (bfloat16_t const * __ptr)
|
||||
|
@ -310,8 +310,9 @@ VAR1 (TERNOP, vtbx1, v8qi)
|
||||
VAR1 (TERNOP, vtbx2, v8qi)
|
||||
VAR1 (TERNOP, vtbx3, v8qi)
|
||||
VAR1 (TERNOP, vtbx4, v8qi)
|
||||
VAR12 (LOAD1, vld1,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
|
||||
VAR14 (LOAD1, vld1,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di,
|
||||
v4bf, v8bf)
|
||||
VAR12 (LOAD1LANE, vld1_lane,
|
||||
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf)
|
||||
VAR10 (LOAD1, vld1_dup,
|
||||
|
29
gcc/testsuite/gcc.target/arm/simd/vld1_bf16_1.c
Normal file
29
gcc/testsuite/gcc.target/arm/simd/vld1_bf16_1.c
Normal file
@ -0,0 +1,29 @@
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/*
|
||||
**test_vld1_bf16:
|
||||
** vld1.16 {d0}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x4_t
|
||||
test_vld1_bf16 (bfloat16_t const *p)
|
||||
{
|
||||
return vld1_bf16 (p);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld1q_bf16:
|
||||
** vld1.16 {d0-d1}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x8_t
|
||||
test_vld1q_bf16 (bfloat16_t const *p)
|
||||
{
|
||||
return vld1q_bf16 (p);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user