aarch64: Add bfloat16 vldN_lane_bf16 + vldNq_lane_bf16 intrisics

gcc/ChangeLog

2020-10-15  Andrea Corallo  <andrea.corallo@arm.com>

	* config/aarch64/arm_neon.h (__LD2_LANE_FUNC, __LD3_LANE_FUNC)
	(__LD4_LANE_FUNC): Rename the macro generating the 'q' variants
	into __LD2Q_LANE_FUNC, __LD2Q_LANE_FUNC, __LD2Q_LANE_FUNC so they
	all can be undefed at the and of the file.
	(vld2_lane_bf16, vld2q_lane_bf16, vld3_lane_bf16, vld3q_lane_bf16)
	(vld4_lane_bf16, vld4q_lane_bf16): Add new intrinsics.

gcc/testsuite/ChangeLog

2020-10-15  Andrea Corallo  <andrea.corallo@arm.com>

	* gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_1.c: New
	testcase.
	* gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c:
	Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_bf16_indices_1.c:
	Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_bf16_indices_1.c:
	Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_bf16_indices_1.c:
	Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_bf16_indices_1.c:
	Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_bf16_indices_1.c:
	Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_bf16_indices_1.c:
	Likewise.
This commit is contained in:
Andrea Corallo 2020-10-15 10:16:18 +02:00
parent 31643fa3e9
commit 44e570d9fb
9 changed files with 289 additions and 57 deletions

View File

@ -20848,11 +20848,9 @@ __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, s
__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
u64, int64x2_t)
#undef __LD2_LANE_FUNC
/* vld2q_lane */
#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
#define __LD2Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
__extension__ extern __inline intype \
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
@ -20868,22 +20866,20 @@ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
#undef __LD2_LANE_FUNC
__LD2Q_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD2Q_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD2Q_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
__LD2Q_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD2Q_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
__LD2Q_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD2Q_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD2Q_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD2Q_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
__LD2Q_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
__LD2Q_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
__LD2Q_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
__LD2Q_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
__LD2Q_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
/* vld3_lane */
@ -20947,11 +20943,9 @@ __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, s
__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
u64, int64x2_t)
#undef __LD3_LANE_FUNC
/* vld3q_lane */
#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
__extension__ extern __inline intype \
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
@ -20969,22 +20963,20 @@ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
#undef __LD3_LANE_FUNC
__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
/* vld4_lane */
@ -21056,11 +21048,9 @@ __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, s
__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
u64, int64x2_t)
#undef __LD4_LANE_FUNC
/* vld4q_lane */
#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
__extension__ extern __inline intype \
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
@ -21080,22 +21070,20 @@ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
#undef __LD4_LANE_FUNC
__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
/* vmax */
@ -35752,6 +35740,15 @@ vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
__a, __lane1);
}
__LD2_LANE_FUNC (bfloat16x4x2_t, bfloat16x4_t, bfloat16x8x2_t, bfloat16_t, v4bf,
v8bf, bf, bf16, bfloat16x8_t)
__LD2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf,
v8bf, bf, bf16, bfloat16x8_t)
__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf,
v8bf, bf, bf16, bfloat16x8_t)
__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
#pragma GCC pop_options
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
@ -35965,4 +35962,11 @@ vaddq_p128 (poly128_t __a, poly128_t __b)
#undef __aarch64_vdupq_laneq_u32
#undef __aarch64_vdupq_laneq_u64
#undef __LD2_LANE_FUNC
#undef __LD2Q_LANE_FUNC
#undef __LD3_LANE_FUNC
#undef __LD3Q_LANE_FUNC
#undef __LD4_LANE_FUNC
#undef __LD4Q_LANE_FUNC
#endif

View File

@ -0,0 +1,74 @@
/* { dg-do run { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include <arm_neon.h>
extern void abort (void);
typedef union
{
bfloat16_t bf16;
uint16_t u16;
} bfloat16_u_t;
#define VARIANTS(VARIANT, STRUCT) \
VARIANT (bfloat16, , 4, _bf16, 3, STRUCT) \
VARIANT (bfloat16, q, 8, _bf16, 7, STRUCT)
#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
int \
test_vld##STRUCT##Q##_lane##SUFFIX (const bfloat16_u_t *data, \
const bfloat16_u_t *overwrite) \
{ \
BASE##x##ELTS##x##STRUCT##_t vectors; \
bfloat16_u_t temp[ELTS]; \
int i,j; \
for (i = 0; i < STRUCT; i++, data += ELTS) \
vectors.val[i] = vld1##Q##SUFFIX ((bfloat16_t *)data); \
vectors = vld##STRUCT##Q##_lane##SUFFIX ((bfloat16_t *) overwrite, \
vectors, LANE); \
while (--i >= 0) \
{ \
vst1##Q##SUFFIX ((bfloat16_t *)temp, vectors.val[i]); \
data -= ELTS; /* Point at value loaded before vldN_lane. */ \
for (j = 0; j < ELTS; j++) \
if (temp[j].u16 != (j == LANE ? overwrite[i].u16 : data[j].u16)) \
return 1; \
} \
return 0; \
}
/* Tests of vld2_lane and vld2q_lane. */
VARIANTS (TESTMETH, 2)
/* Tests of vld3_lane and vld3q_lane. */
VARIANTS (TESTMETH, 3)
/* Tests of vld4_lane and vld4q_lane. */
VARIANTS (TESTMETH, 4)
#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
if (test_vld##STRUCT##Q##_lane##SUFFIX ((const bfloat16_u_t *)orig_data, \
BASE##_data) != 0) \
abort ();
int
main (int argc, char **argv)
{
/* Original data for all vector formats. */
uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
0x012389ab4567cdefULL, 0xdeeddadacafe0431ULL,
0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
/* Data with which vldN_lane will overwrite some of previous. */
bfloat16_u_t bfloat16_data[4];
bfloat16_data[0].u16 = 0xABAB;
bfloat16_data[1].u16 = 0x0;
bfloat16_data[2].u16 = 0xCAFE;
bfloat16_data[3].u16 = 0x1234;
VARIANTS (CHECK, 2);
VARIANTS (CHECK, 3);
VARIANTS (CHECK, 4);
return 0;
}

View File

@ -0,0 +1,52 @@
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-O2 --save-temps" } */
#include <arm_neon.h>
bfloat16x4x2_t
test_vld2_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x2_t b)
{
return vld2_lane_bf16 (ptr, b, 2);
}
bfloat16x8x2_t
test_vld2q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x2_t b)
{
return vld2q_lane_bf16 (ptr, b, 2);
}
/* { dg-final { scan-assembler-times "ld2\\t{v2.h - v3.h}\\\[2\\\], \\\[x0\\\]" 2 } } */
bfloat16x4x3_t
test_vld3_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x3_t b)
{
return vld3_lane_bf16 (ptr, b, 2);
}
/* { dg-final { scan-assembler-times "ld3\t{v4.h - v6.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
bfloat16x8x3_t
test_vld3q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x3_t b)
{
return vld3q_lane_bf16 (ptr, b, 2);
}
/* { dg-final { scan-assembler-times "ld3\t{v1.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
bfloat16x4x4_t
test_vld4_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x4_t b)
{
return vld4_lane_bf16 (ptr, b, 2);
}
/* { dg-final { scan-assembler-times "ld4\t{v4.h - v7.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
bfloat16x8x4_t
test_vld4q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x4_t b)
{
return vld4q_lane_bf16 (ptr, b, 2);
}
/* { dg-final { scan-assembler-times "ld4\t{v0.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */

View File

@ -0,0 +1,17 @@
/* { dg-do compile { target { aarch64*-*-* } } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include <arm_neon.h>
bfloat16x4x2_t
f_vld2_lane_bf16 (bfloat16_t * p, bfloat16x4x2_t v)
{
bfloat16x4x2_t res;
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
res = vld2_lane_bf16 (p, v, 4);
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
res = vld2_lane_bf16 (p, v, -1);
return res;
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile { target { aarch64*-*-* } } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include <arm_neon.h>
bfloat16x8x2_t
f_vld2q_lane_bf16 (bfloat16_t * p, bfloat16x8x2_t v)
{
bfloat16x8x2_t res;
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
res = vld2q_lane_bf16 (p, v, 8);
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
res = vld2q_lane_bf16 (p, v, -1);
return res;
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile { target { aarch64*-*-* } } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include <arm_neon.h>
bfloat16x4x3_t
f_vld3_lane_bf16 (bfloat16_t * p, bfloat16x4x3_t v)
{
bfloat16x4x3_t res;
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
res = vld3_lane_bf16 (p, v, 4);
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
res = vld3_lane_bf16 (p, v, -1);
return res;
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile { target { aarch64*-*-* } } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include <arm_neon.h>
bfloat16x8x3_t
f_vld3q_lane_bf16 (bfloat16_t * p, bfloat16x8x3_t v)
{
bfloat16x8x3_t res;
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
res = vld3q_lane_bf16 (p, v, 8);
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
res = vld3q_lane_bf16 (p, v, -1);
return res;
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile { target { aarch64*-*-* } } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include <arm_neon.h>
bfloat16x4x4_t
f_vld4_lane_bf16 (bfloat16_t * p, bfloat16x4x4_t v)
{
bfloat16x4x4_t res;
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
res = vld4_lane_bf16 (p, v, 4);
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
res = vld4_lane_bf16 (p, v, -1);
return res;
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile { target { aarch64*-*-* } } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include <arm_neon.h>
bfloat16x8x4_t
f_vld4q_lane_bf16 (bfloat16_t * p, bfloat16x8x4_t v)
{
bfloat16x8x4_t res;
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
res = vld4q_lane_bf16 (p, v, 8);
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
res = vld4q_lane_bf16 (p, v, -1);
return res;
}