mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 07:40:28 +08:00
aarch64: Add bfloat16 vldN_lane_bf16 + vldNq_lane_bf16 intrisics
gcc/ChangeLog 2020-10-15 Andrea Corallo <andrea.corallo@arm.com> * config/aarch64/arm_neon.h (__LD2_LANE_FUNC, __LD3_LANE_FUNC) (__LD4_LANE_FUNC): Rename the macro generating the 'q' variants into __LD2Q_LANE_FUNC, __LD2Q_LANE_FUNC, __LD2Q_LANE_FUNC so they all can be undefed at the and of the file. (vld2_lane_bf16, vld2q_lane_bf16, vld3_lane_bf16, vld3q_lane_bf16) (vld4_lane_bf16, vld4q_lane_bf16): Add new intrinsics. gcc/testsuite/ChangeLog 2020-10-15 Andrea Corallo <andrea.corallo@arm.com> * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_1.c: New testcase. * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_bf16_indices_1.c: Likewise.
This commit is contained in:
parent
31643fa3e9
commit
44e570d9fb
@ -20848,11 +20848,9 @@ __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, s
|
||||
__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
|
||||
u64, int64x2_t)
|
||||
|
||||
#undef __LD2_LANE_FUNC
|
||||
|
||||
/* vld2q_lane */
|
||||
|
||||
#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
|
||||
#define __LD2Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
|
||||
__extension__ extern __inline intype \
|
||||
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
|
||||
vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
||||
@ -20868,22 +20866,20 @@ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
|
||||
__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
|
||||
__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
|
||||
__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
|
||||
__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
|
||||
__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
|
||||
__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
|
||||
__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
|
||||
__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
|
||||
__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
|
||||
__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
|
||||
__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
|
||||
__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
|
||||
__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
|
||||
|
||||
#undef __LD2_LANE_FUNC
|
||||
__LD2Q_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
|
||||
__LD2Q_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
|
||||
__LD2Q_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
|
||||
__LD2Q_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
|
||||
__LD2Q_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
|
||||
__LD2Q_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
|
||||
__LD2Q_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
|
||||
__LD2Q_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
|
||||
__LD2Q_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
|
||||
__LD2Q_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
|
||||
__LD2Q_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
|
||||
__LD2Q_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
|
||||
__LD2Q_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
|
||||
__LD2Q_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
|
||||
|
||||
/* vld3_lane */
|
||||
|
||||
@ -20947,11 +20943,9 @@ __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, s
|
||||
__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
|
||||
u64, int64x2_t)
|
||||
|
||||
#undef __LD3_LANE_FUNC
|
||||
|
||||
/* vld3q_lane */
|
||||
|
||||
#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
|
||||
#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
|
||||
__extension__ extern __inline intype \
|
||||
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
|
||||
vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
||||
@ -20969,22 +20963,20 @@ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
|
||||
__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
|
||||
__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
|
||||
__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
|
||||
__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
|
||||
__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
|
||||
__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
|
||||
__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
|
||||
__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
|
||||
__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
|
||||
__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
|
||||
__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
|
||||
__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
|
||||
__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
|
||||
|
||||
#undef __LD3_LANE_FUNC
|
||||
__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
|
||||
__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
|
||||
__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
|
||||
__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
|
||||
__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
|
||||
__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
|
||||
__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
|
||||
__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
|
||||
__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
|
||||
__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
|
||||
__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
|
||||
__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
|
||||
__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
|
||||
__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
|
||||
|
||||
/* vld4_lane */
|
||||
|
||||
@ -21056,11 +21048,9 @@ __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, s
|
||||
__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
|
||||
u64, int64x2_t)
|
||||
|
||||
#undef __LD4_LANE_FUNC
|
||||
|
||||
/* vld4q_lane */
|
||||
|
||||
#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
|
||||
#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
|
||||
__extension__ extern __inline intype \
|
||||
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
|
||||
vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
||||
@ -21080,22 +21070,20 @@ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
|
||||
__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
|
||||
__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
|
||||
__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
|
||||
__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
|
||||
__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
|
||||
__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
|
||||
__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
|
||||
__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
|
||||
__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
|
||||
__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
|
||||
__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
|
||||
__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
|
||||
__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
|
||||
|
||||
#undef __LD4_LANE_FUNC
|
||||
__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
|
||||
__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
|
||||
__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
|
||||
__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
|
||||
__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
|
||||
__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
|
||||
__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
|
||||
__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
|
||||
__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
|
||||
__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
|
||||
__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
|
||||
__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
|
||||
__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
|
||||
__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
|
||||
|
||||
/* vmax */
|
||||
|
||||
@ -35752,6 +35740,15 @@ vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
|
||||
__a, __lane1);
|
||||
}
|
||||
|
||||
__LD2_LANE_FUNC (bfloat16x4x2_t, bfloat16x4_t, bfloat16x8x2_t, bfloat16_t, v4bf,
|
||||
v8bf, bf, bf16, bfloat16x8_t)
|
||||
__LD2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
|
||||
__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf,
|
||||
v8bf, bf, bf16, bfloat16x8_t)
|
||||
__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
|
||||
__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf,
|
||||
v8bf, bf, bf16, bfloat16x8_t)
|
||||
__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
|
||||
#pragma GCC pop_options
|
||||
|
||||
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
|
||||
@ -35965,4 +35962,11 @@ vaddq_p128 (poly128_t __a, poly128_t __b)
|
||||
#undef __aarch64_vdupq_laneq_u32
|
||||
#undef __aarch64_vdupq_laneq_u64
|
||||
|
||||
#undef __LD2_LANE_FUNC
|
||||
#undef __LD2Q_LANE_FUNC
|
||||
#undef __LD3_LANE_FUNC
|
||||
#undef __LD3Q_LANE_FUNC
|
||||
#undef __LD4_LANE_FUNC
|
||||
#undef __LD4Q_LANE_FUNC
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,74 @@
|
||||
/* { dg-do run { target { aarch64*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
typedef union
|
||||
{
|
||||
bfloat16_t bf16;
|
||||
uint16_t u16;
|
||||
} bfloat16_u_t;
|
||||
|
||||
#define VARIANTS(VARIANT, STRUCT) \
|
||||
VARIANT (bfloat16, , 4, _bf16, 3, STRUCT) \
|
||||
VARIANT (bfloat16, q, 8, _bf16, 7, STRUCT)
|
||||
|
||||
#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
|
||||
int \
|
||||
test_vld##STRUCT##Q##_lane##SUFFIX (const bfloat16_u_t *data, \
|
||||
const bfloat16_u_t *overwrite) \
|
||||
{ \
|
||||
BASE##x##ELTS##x##STRUCT##_t vectors; \
|
||||
bfloat16_u_t temp[ELTS]; \
|
||||
int i,j; \
|
||||
for (i = 0; i < STRUCT; i++, data += ELTS) \
|
||||
vectors.val[i] = vld1##Q##SUFFIX ((bfloat16_t *)data); \
|
||||
vectors = vld##STRUCT##Q##_lane##SUFFIX ((bfloat16_t *) overwrite, \
|
||||
vectors, LANE); \
|
||||
while (--i >= 0) \
|
||||
{ \
|
||||
vst1##Q##SUFFIX ((bfloat16_t *)temp, vectors.val[i]); \
|
||||
data -= ELTS; /* Point at value loaded before vldN_lane. */ \
|
||||
for (j = 0; j < ELTS; j++) \
|
||||
if (temp[j].u16 != (j == LANE ? overwrite[i].u16 : data[j].u16)) \
|
||||
return 1; \
|
||||
} \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
/* Tests of vld2_lane and vld2q_lane. */
|
||||
VARIANTS (TESTMETH, 2)
|
||||
/* Tests of vld3_lane and vld3q_lane. */
|
||||
VARIANTS (TESTMETH, 3)
|
||||
/* Tests of vld4_lane and vld4q_lane. */
|
||||
VARIANTS (TESTMETH, 4)
|
||||
|
||||
#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
|
||||
if (test_vld##STRUCT##Q##_lane##SUFFIX ((const bfloat16_u_t *)orig_data, \
|
||||
BASE##_data) != 0) \
|
||||
abort ();
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
/* Original data for all vector formats. */
|
||||
uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
|
||||
0x012389ab4567cdefULL, 0xdeeddadacafe0431ULL,
|
||||
0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
|
||||
0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
|
||||
|
||||
/* Data with which vldN_lane will overwrite some of previous. */
|
||||
bfloat16_u_t bfloat16_data[4];
|
||||
bfloat16_data[0].u16 = 0xABAB;
|
||||
bfloat16_data[1].u16 = 0x0;
|
||||
bfloat16_data[2].u16 = 0xCAFE;
|
||||
bfloat16_data[3].u16 = 0x1234;
|
||||
|
||||
VARIANTS (CHECK, 2);
|
||||
VARIANTS (CHECK, 3);
|
||||
VARIANTS (CHECK, 4);
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
/* { dg-do assemble { target { aarch64*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
/* { dg-additional-options "-O2 --save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4x2_t
|
||||
test_vld2_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x2_t b)
|
||||
{
|
||||
return vld2_lane_bf16 (ptr, b, 2);
|
||||
}
|
||||
|
||||
bfloat16x8x2_t
|
||||
test_vld2q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x2_t b)
|
||||
{
|
||||
return vld2q_lane_bf16 (ptr, b, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ld2\\t{v2.h - v3.h}\\\[2\\\], \\\[x0\\\]" 2 } } */
|
||||
|
||||
bfloat16x4x3_t
|
||||
test_vld3_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x3_t b)
|
||||
{
|
||||
return vld3_lane_bf16 (ptr, b, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ld3\t{v4.h - v6.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
|
||||
|
||||
bfloat16x8x3_t
|
||||
test_vld3q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x3_t b)
|
||||
{
|
||||
return vld3q_lane_bf16 (ptr, b, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ld3\t{v1.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
|
||||
|
||||
bfloat16x4x4_t
|
||||
test_vld4_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x4_t b)
|
||||
{
|
||||
return vld4_lane_bf16 (ptr, b, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ld4\t{v4.h - v7.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
|
||||
|
||||
bfloat16x8x4_t
|
||||
test_vld4q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x4_t b)
|
||||
{
|
||||
return vld4q_lane_bf16 (ptr, b, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ld4\t{v0.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
|
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4x2_t
|
||||
f_vld2_lane_bf16 (bfloat16_t * p, bfloat16x4x2_t v)
|
||||
{
|
||||
bfloat16x4x2_t res;
|
||||
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vld2_lane_bf16 (p, v, 4);
|
||||
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vld2_lane_bf16 (p, v, -1);
|
||||
return res;
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x8x2_t
|
||||
f_vld2q_lane_bf16 (bfloat16_t * p, bfloat16x8x2_t v)
|
||||
{
|
||||
bfloat16x8x2_t res;
|
||||
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vld2q_lane_bf16 (p, v, 8);
|
||||
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vld2q_lane_bf16 (p, v, -1);
|
||||
return res;
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4x3_t
|
||||
f_vld3_lane_bf16 (bfloat16_t * p, bfloat16x4x3_t v)
|
||||
{
|
||||
bfloat16x4x3_t res;
|
||||
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vld3_lane_bf16 (p, v, 4);
|
||||
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vld3_lane_bf16 (p, v, -1);
|
||||
return res;
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x8x3_t
|
||||
f_vld3q_lane_bf16 (bfloat16_t * p, bfloat16x8x3_t v)
|
||||
{
|
||||
bfloat16x8x3_t res;
|
||||
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vld3q_lane_bf16 (p, v, 8);
|
||||
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vld3q_lane_bf16 (p, v, -1);
|
||||
return res;
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4x4_t
|
||||
f_vld4_lane_bf16 (bfloat16_t * p, bfloat16x4x4_t v)
|
||||
{
|
||||
bfloat16x4x4_t res;
|
||||
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vld4_lane_bf16 (p, v, 4);
|
||||
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vld4_lane_bf16 (p, v, -1);
|
||||
return res;
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x8x4_t
|
||||
f_vld4q_lane_bf16 (bfloat16_t * p, bfloat16x8x4_t v)
|
||||
{
|
||||
bfloat16x8x4_t res;
|
||||
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vld4q_lane_bf16 (p, v, 8);
|
||||
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vld4q_lane_bf16 (p, v, -1);
|
||||
return res;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user