mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-16 07:50:27 +08:00
re PR target/88839 ([SVE] Poor implementation of blend-like permutes)
2019-08-22 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> Richard Sandiford <richard.sandiford@arm.com> PR target/88839 * config/aarch64/aarch64.c (aarch64_evpc_sel): New function. (aarch64_expand_vec_perm_const_1): Call aarch64_evpc_sel. testsuite/ * gcc.target/aarch64/sve/sel_1.c: New test. * gcc.target/aarch64/sve/sel_2.c: Likewise. * gcc.target/aarch64/sve/sel_3.c: Likewise. * gcc.target/aarch64/sve/sel_4.c: Likewise. * gcc.target/aarch64/sve/sel_5.c: Likewise. * gcc.target/aarch64/sve/sel_6.c: Likewise. Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com> From-SVN: r274810
This commit is contained in:
parent
846f78d414
commit
9556ef2016
@ -1,3 +1,10 @@
|
||||
2019-08-22 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
||||
Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR target/88839
|
||||
* config/aarch64/aarch64.c (aarch64_evpc_sel): New function.
|
||||
(aarch64_expand_vec_perm_const_1): Call aarch64_evpc_sel.
|
||||
|
||||
2019-08-21 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
||||
|
||||
PR target/90724
|
||||
|
@ -17975,6 +17975,50 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Try to implement D using SVE SEL instruction. */
|
||||
|
||||
static bool
|
||||
aarch64_evpc_sel (struct expand_vec_perm_d *d)
|
||||
{
|
||||
machine_mode vmode = d->vmode;
|
||||
int unit_size = GET_MODE_UNIT_SIZE (vmode);
|
||||
|
||||
if (d->vec_flags != VEC_SVE_DATA
|
||||
|| unit_size > 8)
|
||||
return false;
|
||||
|
||||
int n_patterns = d->perm.encoding ().npatterns ();
|
||||
poly_int64 vec_len = d->perm.length ();
|
||||
|
||||
for (int i = 0; i < n_patterns; ++i)
|
||||
if (!known_eq (d->perm[i], i)
|
||||
&& !known_eq (d->perm[i], vec_len + i))
|
||||
return false;
|
||||
|
||||
for (int i = n_patterns; i < n_patterns * 2; i++)
|
||||
if (!d->perm.series_p (i, n_patterns, i, n_patterns)
|
||||
&& !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
|
||||
return false;
|
||||
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
machine_mode pred_mode = aarch64_sve_pred_mode (unit_size).require ();
|
||||
|
||||
rtx_vector_builder builder (pred_mode, n_patterns, 2);
|
||||
for (int i = 0; i < n_patterns * 2; i++)
|
||||
{
|
||||
rtx elem = known_eq (d->perm[i], i) ? CONST1_RTX (BImode)
|
||||
: CONST0_RTX (BImode);
|
||||
builder.quick_push (elem);
|
||||
}
|
||||
|
||||
rtx const_vec = builder.build ();
|
||||
rtx pred = force_reg (pred_mode, const_vec);
|
||||
emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op1, d->op0, pred));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||
{
|
||||
@ -18007,6 +18051,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
else if (aarch64_evpc_trn (d))
|
||||
return true;
|
||||
else if (aarch64_evpc_sel (d))
|
||||
return true;
|
||||
if (d->vec_flags == VEC_SVE_DATA)
|
||||
return aarch64_evpc_sve_tbl (d);
|
||||
else if (d->vec_flags == VEC_ADVSIMD)
|
||||
|
@ -1,3 +1,14 @@
|
||||
2019-08-22 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
||||
Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR target/88839
|
||||
* gcc.target/aarch64/sve/sel_1.c: New test.
|
||||
* gcc.target/aarch64/sve/sel_2.c: Likewise.
|
||||
* gcc.target/aarch64/sve/sel_3.c: Likewise.
|
||||
* gcc.target/aarch64/sve/sel_4.c: Likewise.
|
||||
* gcc.target/aarch64/sve/sel_5.c: Likewise.
|
||||
* gcc.target/aarch64/sve/sel_6.c: Likewise.
|
||||
|
||||
2019-08-21 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR c++/91505
|
||||
|
27
gcc/testsuite/gcc.target/aarch64/sve/sel_1.c
Normal file
27
gcc/testsuite/gcc.target/aarch64/sve/sel_1.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t vnx16qi __attribute__((vector_size (32)));
|
||||
|
||||
/* Predicate vector: 1 0 1 0 ... */
|
||||
|
||||
#define MASK_32 { 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \
|
||||
10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \
|
||||
20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63 }
|
||||
|
||||
#define INDEX_32 vnx16qi
|
||||
|
||||
#define PERMUTE(type, nunits) \
|
||||
type permute_##type (type x, type y) \
|
||||
{ \
|
||||
return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \
|
||||
}
|
||||
|
||||
PERMUTE(vnx16qi, 32)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\ttbl\t} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.h, vl16\n} 1 } } */
|
41
gcc/testsuite/gcc.target/aarch64/sve/sel_2.c
Normal file
41
gcc/testsuite/gcc.target/aarch64/sve/sel_2.c
Normal file
@ -0,0 +1,41 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t vnx16qi __attribute__((vector_size (32)));
|
||||
typedef int16_t vnx8hi __attribute__((vector_size (32)));
|
||||
typedef int32_t vnx4si __attribute__((vector_size (32)));
|
||||
|
||||
typedef _Float16 vnx8hf __attribute__((vector_size (32)));
|
||||
typedef float vnx4sf __attribute__((vector_size (32)));
|
||||
|
||||
/* Predicate vector: 1 0 0 0 ... */
|
||||
|
||||
#define MASK_32 { 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12, \
|
||||
45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24, \
|
||||
57, 58, 59, 28, 61, 62, 63 }
|
||||
|
||||
/* Predicate vector: 1 0 1 0 ... */
|
||||
|
||||
#define MASK_16 {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31}
|
||||
|
||||
#define INDEX_32 vnx16qi
|
||||
#define INDEX_16 vnx8hi
|
||||
|
||||
#define PERMUTE(type, nunits) \
|
||||
type permute_##type (type x, type y) \
|
||||
{ \
|
||||
return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \
|
||||
}
|
||||
|
||||
PERMUTE(vnx16qi, 32)
|
||||
PERMUTE(vnx8hi, 16)
|
||||
PERMUTE(vnx8hf, 16)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\ttbl\t} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s, vl8\n} 3 } } */
|
50
gcc/testsuite/gcc.target/aarch64/sve/sel_3.c
Normal file
50
gcc/testsuite/gcc.target/aarch64/sve/sel_3.c
Normal file
@ -0,0 +1,50 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t vnx16qi __attribute__((vector_size (32)));
|
||||
typedef int16_t vnx8hi __attribute__((vector_size (32)));
|
||||
typedef int32_t vnx4si __attribute__((vector_size (32)));
|
||||
typedef _Float16 vnx8hf __attribute__((vector_size (32)));
|
||||
typedef float vnx4sf __attribute__((vector_size (32)));
|
||||
|
||||
/* Predicate vector: 1 0 0 0 0 0 0 0 ... */
|
||||
|
||||
#define MASK_32 { 0, 33, 34, 35, 36, 37, 38, 39, \
|
||||
8, 41, 42, 43, 44, 45, 46, 47, \
|
||||
16, 49, 50, 51, 52, 53, 54, 55, \
|
||||
24, 57, 58, 59, 60, 61, 62, 63 }
|
||||
|
||||
/* Predicate vector: 1 0 0 0 ... */
|
||||
|
||||
#define MASK_16 { 0, 17, 18, 19, 4, 21, 22, 23, \
|
||||
8, 25, 26, 27, 12, 29, 30, 31 }
|
||||
|
||||
/* Predicate vector: 1 0 ... */
|
||||
|
||||
#define MASK_8 { 0, 9, 2, 11, 4, 13, 6, 15 }
|
||||
|
||||
#define INDEX_32 vnx16qi
|
||||
#define INDEX_16 vnx8hi
|
||||
#define INDEX_8 vnx4si
|
||||
|
||||
#define PERMUTE(type, nunits) \
|
||||
type permute_##type (type x, type y) \
|
||||
{ \
|
||||
return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \
|
||||
}
|
||||
|
||||
PERMUTE(vnx16qi, 32)
|
||||
PERMUTE(vnx8hi, 16)
|
||||
PERMUTE(vnx4si, 8)
|
||||
PERMUTE(vnx8hf, 16)
|
||||
PERMUTE(vnx4sf, 8)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\ttbl\t} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 5 } } */
|
50
gcc/testsuite/gcc.target/aarch64/sve/sel_4.c
Normal file
50
gcc/testsuite/gcc.target/aarch64/sve/sel_4.c
Normal file
@ -0,0 +1,50 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t vnx16qi __attribute__((vector_size (32)));
|
||||
typedef int16_t vnx8hi __attribute__((vector_size (32)));
|
||||
typedef int32_t vnx4si __attribute__((vector_size (32)));
|
||||
typedef int64_t vnx2di __attribute__((vector_size (32)));
|
||||
|
||||
typedef _Float16 vnx8hf __attribute__((vector_size (32)));
|
||||
typedef float vnx4sf __attribute__((vector_size (32)));
|
||||
typedef double vnx2df __attribute__((vector_size (32)));
|
||||
|
||||
/* Predicate vector: 1 1 0 0 ... */
|
||||
|
||||
#define MASK_32 { 0, 1, 34, 35, 4, 5, 38, 39, 8, 9, 42, 43, 12, 13, \
|
||||
46, 47, 16, 17, 50, 51, 20, 21, 54, 55, 24, 25, \
|
||||
58, 59, 28, 29, 62, 63 }
|
||||
|
||||
#define MASK_16 {0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31}
|
||||
#define MASK_8 {0, 1, 10, 11, 4, 5, 14, 15}
|
||||
#define MASK_4 {0, 1, 6, 7}
|
||||
|
||||
#define INDEX_32 vnx16qi
|
||||
#define INDEX_16 vnx8hi
|
||||
#define INDEX_8 vnx4si
|
||||
#define INDEX_4 vnx2di
|
||||
|
||||
#define PERMUTE(type, nunits) \
|
||||
type permute_##type (type x, type y) \
|
||||
{ \
|
||||
return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \
|
||||
}
|
||||
|
||||
PERMUTE(vnx16qi, 32)
|
||||
PERMUTE(vnx8hi, 16)
|
||||
PERMUTE(vnx4si, 8)
|
||||
PERMUTE(vnx2di, 4)
|
||||
|
||||
PERMUTE(vnx8hf, 16)
|
||||
PERMUTE(vnx4sf, 8)
|
||||
PERMUTE(vnx2df, 4)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\ttbl\t} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
50
gcc/testsuite/gcc.target/aarch64/sve/sel_5.c
Normal file
50
gcc/testsuite/gcc.target/aarch64/sve/sel_5.c
Normal file
@ -0,0 +1,50 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t vnx16qi __attribute__((vector_size (32)));
|
||||
typedef int16_t vnx8hi __attribute__((vector_size (32)));
|
||||
typedef int32_t vnx4si __attribute__((vector_size (32)));
|
||||
typedef int64_t vnx2di __attribute__((vector_size (32)));
|
||||
|
||||
typedef _Float16 vnx8hf __attribute__((vector_size (32)));
|
||||
typedef float vnx4sf __attribute__((vector_size (32)));
|
||||
typedef double vnx2df __attribute__((vector_size (32)));
|
||||
|
||||
/* Predicate vector: 1 0 0 1 ... */
|
||||
|
||||
#define MASK_32 { 0, 33, 34, 3, 4, 37, 38, 7, 8, 41, 42, 11, 12, 45, 46, \
|
||||
15, 16, 49, 50, 19, 20, 53, 54, 23, 24, 57, 58, 27, 28, \
|
||||
61, 62, 31 }
|
||||
|
||||
#define MASK_16 {0, 17, 18, 3, 4, 21, 22, 7, 8, 25, 26, 11, 12, 29, 30, 15}
|
||||
#define MASK_8 {0, 9, 10, 3, 4, 13, 14, 7}
|
||||
#define MASK_4 {0, 5, 6, 3}
|
||||
|
||||
#define INDEX_32 vnx16qi
|
||||
#define INDEX_16 vnx8hi
|
||||
#define INDEX_8 vnx4si
|
||||
#define INDEX_4 vnx2di
|
||||
|
||||
#define PERMUTE(type, nunits) \
|
||||
type permute_##type (type x, type y) \
|
||||
{ \
|
||||
return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \
|
||||
}
|
||||
|
||||
PERMUTE(vnx16qi, 32)
|
||||
PERMUTE(vnx8hi, 16)
|
||||
PERMUTE(vnx4si, 8)
|
||||
PERMUTE(vnx2di, 4)
|
||||
|
||||
PERMUTE(vnx8hf, 16)
|
||||
PERMUTE(vnx4sf, 8)
|
||||
PERMUTE(vnx2df, 4)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\ttbl\t} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
42
gcc/testsuite/gcc.target/aarch64/sve/sel_6.c
Normal file
42
gcc/testsuite/gcc.target/aarch64/sve/sel_6.c
Normal file
@ -0,0 +1,42 @@
|
||||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t vnx4si __attribute__((vector_size (32)));
|
||||
typedef int64_t vnx2di __attribute__((vector_size (32)));
|
||||
|
||||
typedef float vnx4sf __attribute__((vector_size (32)));
|
||||
typedef double vnx2df __attribute__((vector_size (32)));
|
||||
|
||||
/* Predicate vector: 1 0 0 0 ... */
|
||||
|
||||
#define MASK_32 { 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12, \
|
||||
45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24, \
|
||||
57, 58, 59, 28, 61, 62, 63 }
|
||||
|
||||
#define MASK_16 {0, 17, 18, 19, 4, 21, 22, 23, 8, 25, 26, 27, 12, 29, 30, 31}
|
||||
#define MASK_8 {0, 9, 10, 11, 4, 13, 14, 15}
|
||||
#define MASK_4 {0, 5, 6, 7}
|
||||
|
||||
#define INDEX_8 vnx4si
|
||||
#define INDEX_4 vnx2di
|
||||
|
||||
#define PERMUTE(type, nunits) \
|
||||
type permute_##type (type x, type y) \
|
||||
{ \
|
||||
return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \
|
||||
}
|
||||
|
||||
PERMUTE(vnx4si, 8)
|
||||
PERMUTE(vnx2di, 4)
|
||||
|
||||
PERMUTE(vnx4sf, 8)
|
||||
PERMUTE(vnx2df, 4)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\ttbl\t} } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 2 } } */
|
Loading…
x
Reference in New Issue
Block a user