arm: Add vld1_bf16 + vld1q_bf16 intrinsics

gcc/ChangeLog

2020-10-29  Andrea Corallo  <andrea.corallo@arm.com>

	* config/arm/arm-builtins.c (VAR14): Define macro.
	* config/arm/arm_neon_builtins.def: Touch for:
	__builtin_neon_vld1v4bf, __builtin_neon_vld1v8bf.
	* config/arm/arm_neon.h (vld1_bf16, vld1q_bf16): Add intrinsics.

gcc/testsuite/ChangeLog

2020-10-29  Andrea Corallo  <andrea.corallo@arm.com>

	* gcc.target/arm/simd/vld1_bf16_1.c: New test.
This commit is contained in:
Andrea Corallo 2020-10-29 13:56:17 +01:00
parent d65303b699
commit 890076673d
4 changed files with 49 additions and 2 deletions

View File

@ -946,6 +946,9 @@ typedef struct {
#define VAR13(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
VAR12 (T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR1 (T, N, M)
#define VAR14(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M, O) \
VAR13 (T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
VAR1 (T, N, O)
/* The builtin data can be found in arm_neon_builtins.def, arm_vfp_builtins.def
and arm_acle_builtins.def. The entries in arm_neon_builtins.def require

View File

@ -19557,6 +19557,20 @@ vst4q_bf16 (bfloat16_t * __ptr, bfloat16x8x4_t __val)
return __builtin_neon_vst4v8bf (__ptr, __bu.__o);
}
__extension__ extern __inline bfloat16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1_bf16 (bfloat16_t const * __ptr)
{
return __builtin_neon_vld1v4bf (__ptr);
}
__extension__ extern __inline bfloat16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1q_bf16 (const bfloat16_t * __ptr)
{
return __builtin_neon_vld1v8bf (__ptr);
}
__extension__ extern __inline bfloat16x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2_bf16 (bfloat16_t const * __ptr)

View File

@ -310,8 +310,9 @@ VAR1 (TERNOP, vtbx1, v8qi)
VAR1 (TERNOP, vtbx2, v8qi)
VAR1 (TERNOP, vtbx3, v8qi)
VAR1 (TERNOP, vtbx4, v8qi)
VAR12 (LOAD1, vld1,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR14 (LOAD1, vld1,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di,
v4bf, v8bf)
VAR12 (LOAD1LANE, vld1_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf)
VAR10 (LOAD1, vld1_dup,

View File

@ -0,0 +1,29 @@
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_neon.h"
/*
**test_vld1_bf16:
** vld1.16 {d0}, \[r0\]
** bx lr
*/
bfloat16x4_t
test_vld1_bf16 (bfloat16_t const *p)
{
return vld1_bf16 (p);
}
/*
**test_vld1q_bf16:
** vld1.16 {d0-d1}, \[r0\]
** bx lr
*/
bfloat16x8_t
test_vld1q_bf16 (bfloat16_t const *p)
{
return vld1q_bf16 (p);
}