mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-22 15:00:55 +08:00
[AArch64] Add support for SVE HF vconds
We were missing vcond patterns that had HF comparisons and HI or HF data. 2019-08-14 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/iterators.md (SVE_HSD): New mode iterator. (V_FP_EQUIV, v_fp_equiv): Handle VNx8HI and VNx8HF. * config/aarch64/aarch64-sve.md (vcond<mode><v_fp_equiv>): Use SVE_HSD instead of SVE_SD. gcc/testsuite/ * gcc.target/aarch64/sve/vcond_17.c: New test. * gcc.target/aarch64/sve/vcond_17_run.c: Likewise. From-SVN: r274420
This commit is contained in:
parent
0254ed7970
commit
a70965b114
@ -1,3 +1,10 @@
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* config/aarch64/iterators.md (SVE_HSD): New mode iterator.
|
||||
(V_FP_EQUIV, v_fp_equiv): Handle VNx8HI and VNx8HF.
|
||||
* config/aarch64/aarch64-sve.md (vcond<mode><v_fp_equiv>): Use
|
||||
SVE_HSD instead of SVE_SD.
|
||||
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
|
@ -2884,13 +2884,13 @@
|
||||
;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
|
||||
;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
|
||||
(define_expand "vcond<mode><v_fp_equiv>"
|
||||
[(set (match_operand:SVE_SD 0 "register_operand")
|
||||
(if_then_else:SVE_SD
|
||||
[(set (match_operand:SVE_HSD 0 "register_operand")
|
||||
(if_then_else:SVE_HSD
|
||||
(match_operator 3 "comparison_operator"
|
||||
[(match_operand:<V_FP_EQUIV> 4 "register_operand")
|
||||
(match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
|
||||
(match_operand:SVE_SD 1 "register_operand")
|
||||
(match_operand:SVE_SD 2 "register_operand")))]
|
||||
(match_operand:SVE_HSD 1 "register_operand")
|
||||
(match_operand:SVE_HSD 2 "register_operand")))]
|
||||
"TARGET_SVE"
|
||||
{
|
||||
aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
|
||||
|
@ -301,6 +301,9 @@
|
||||
;; All SVE floating-point vector modes that have 16-bit or 32-bit elements.
|
||||
(define_mode_iterator SVE_HSF [VNx8HF VNx4SF])
|
||||
|
||||
;; All SVE vector modes that have 16-bit, 32-bit or 64-bit elements.
|
||||
(define_mode_iterator SVE_HSD [VNx8HI VNx4SI VNx2DI VNx8HF VNx4SF VNx2DF])
|
||||
|
||||
;; All SVE vector modes that have 32-bit or 64-bit elements.
|
||||
(define_mode_iterator SVE_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
|
||||
|
||||
@ -928,9 +931,11 @@
|
||||
])
|
||||
|
||||
;; Floating-point equivalent of selected modes.
|
||||
(define_mode_attr V_FP_EQUIV [(VNx4SI "VNx4SF") (VNx4SF "VNx4SF")
|
||||
(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
|
||||
(VNx4SI "VNx4SF") (VNx4SF "VNx4SF")
|
||||
(VNx2DI "VNx2DF") (VNx2DF "VNx2DF")])
|
||||
(define_mode_attr v_fp_equiv [(VNx4SI "vnx4sf") (VNx4SF "vnx4sf")
|
||||
(define_mode_attr v_fp_equiv [(VNx8HI "vnx8hf") (VNx8HF "vnx8hf")
|
||||
(VNx4SI "vnx4sf") (VNx4SF "vnx4sf")
|
||||
(VNx2DI "vnx2df") (VNx2DF "vnx2df")])
|
||||
|
||||
;; Mode for vector conditional operations where the comparison has
|
||||
|
@ -1,3 +1,8 @@
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/vcond_17.c: New test.
|
||||
* gcc.target/aarch64/sve/vcond_17_run.c: Likewise.
|
||||
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/spill_4.c: Expect all ptrues to be .Bs.
|
||||
|
94
gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c
Normal file
94
gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c
Normal file
@ -0,0 +1,94 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define eq(A, B) ((A) == (B))
|
||||
#define ne(A, B) ((A) != (B))
|
||||
#define olt(A, B) ((A) < (B))
|
||||
#define ole(A, B) ((A) <= (B))
|
||||
#define oge(A, B) ((A) >= (B))
|
||||
#define ogt(A, B) ((A) > (B))
|
||||
#define ordered(A, B) (!__builtin_isunordered (A, B))
|
||||
#define unordered(A, B) (__builtin_isunordered (A, B))
|
||||
#define ueq(A, B) (!__builtin_islessgreater (A, B))
|
||||
#define ult(A, B) (__builtin_isless (A, B))
|
||||
#define ule(A, B) (__builtin_islessequal (A, B))
|
||||
#define uge(A, B) (__builtin_isgreaterequal (A, B))
|
||||
#define ugt(A, B) (__builtin_isgreater (A, B))
|
||||
#define nueq(A, B) (__builtin_islessgreater (A, B))
|
||||
#define nult(A, B) (!__builtin_isless (A, B))
|
||||
#define nule(A, B) (!__builtin_islessequal (A, B))
|
||||
#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
|
||||
#define nugt(A, B) (!__builtin_isgreater (A, B))
|
||||
|
||||
#define DEF_LOOP(CMP, EXPECT_INVALID) \
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
test_##CMP##_var (__fp16 *restrict dest, __fp16 *restrict src, \
|
||||
__fp16 fallback, __fp16 *restrict a, \
|
||||
__fp16 *restrict b, int count) \
|
||||
{ \
|
||||
for (int i = 0; i < count; ++i) \
|
||||
dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \
|
||||
} \
|
||||
\
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
test_##CMP##_zero (__fp16 *restrict dest, __fp16 *restrict src, \
|
||||
__fp16 fallback, __fp16 *restrict a, \
|
||||
int count) \
|
||||
{ \
|
||||
for (int i = 0; i < count; ++i) \
|
||||
dest[i] = CMP (a[i], (__fp16) 0) ? src[i] : fallback; \
|
||||
} \
|
||||
\
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
test_##CMP##_sel (__fp16 *restrict dest, __fp16 if_true, \
|
||||
__fp16 if_false, __fp16 *restrict a, \
|
||||
__fp16 b, int count) \
|
||||
{ \
|
||||
for (int i = 0; i < count; ++i) \
|
||||
dest[i] = CMP (a[i], b) ? if_true : if_false; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (eq, 0) \
|
||||
T (ne, 0) \
|
||||
T (olt, 1) \
|
||||
T (ole, 1) \
|
||||
T (oge, 1) \
|
||||
T (ogt, 1) \
|
||||
T (ordered, 0) \
|
||||
T (unordered, 0) \
|
||||
T (ueq, 0) \
|
||||
T (ult, 0) \
|
||||
T (ule, 0) \
|
||||
T (uge, 0) \
|
||||
T (ugt, 0) \
|
||||
T (nueq, 0) \
|
||||
T (nult, 0) \
|
||||
T (nule, 0) \
|
||||
T (nuge, 0) \
|
||||
T (nugt, 0)
|
||||
|
||||
TEST_ALL (DEF_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
|
||||
|
||||
/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
|
||||
/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
|
||||
|
||||
/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
|
||||
/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
|
||||
|
||||
/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
|
||||
/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
|
||||
|
||||
/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
|
||||
/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
|
||||
|
||||
/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
|
||||
/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
|
||||
/* { dg-final { scan-assembler {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
|
54
gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c
Normal file
54
gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* { dg-do run { target aarch64_sve_hw } } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
/* { dg-require-effective-target fenv_exceptions } */
|
||||
|
||||
#include <fenv.h>
|
||||
|
||||
#include "vcond_17.c"
|
||||
|
||||
#define N 401
|
||||
|
||||
#define TEST_LOOP(CMP, EXPECT_INVALID) \
|
||||
{ \
|
||||
__fp16 dest1[N], dest2[N], dest3[N], src[N]; \
|
||||
__fp16 a[N], b[N]; \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
src[i] = i * i; \
|
||||
if (i % 5 == 0) \
|
||||
a[i] = 0; \
|
||||
else if (i % 3) \
|
||||
a[i] = i * 0.1; \
|
||||
else \
|
||||
a[i] = i; \
|
||||
if (i % 7 == 0) \
|
||||
b[i] = __builtin_nan (""); \
|
||||
else if (i % 6) \
|
||||
b[i] = i * 0.1; \
|
||||
else \
|
||||
b[i] = i; \
|
||||
asm volatile ("" ::: "memory"); \
|
||||
} \
|
||||
feclearexcept (FE_ALL_EXCEPT); \
|
||||
test_##CMP##_var (dest1, src, 11, a, b, N); \
|
||||
test_##CMP##_zero (dest2, src, 22, a, N); \
|
||||
test_##CMP##_sel (dest3, 33, 44, a, 9, N); \
|
||||
if (!fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \
|
||||
__builtin_abort (); \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \
|
||||
__builtin_abort (); \
|
||||
if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22)) \
|
||||
__builtin_abort (); \
|
||||
if (dest3[i] != (CMP (a[i], 9) ? 33 : 44)) \
|
||||
__builtin_abort (); \
|
||||
} \
|
||||
}
|
||||
|
||||
int __attribute__ ((optimize (1)))
|
||||
main (void)
|
||||
{
|
||||
TEST_ALL (TEST_LOOP)
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user