mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-22 00:01:10 +08:00
[AArch64] Use SVE MOV /M of scalars
This patch uses MOV /M to optimise selects between a duplicated scalar variable and a vector. 2019-08-14 Richard Sandiford <richard.sandiford@arm.com> Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> gcc/ * config/aarch64/aarch64-sve.md (*aarch64_sel_dup<mode>): New pattern. gcc/testsuite/ * g++.target/aarch64/sve/dup_sel_1.C: New test. * g++.target/aarch64/sve/dup_sel_2.C: Likewise. * g++.target/aarch64/sve/dup_sel_3.C: Likewise. * g++.target/aarch64/sve/dup_sel_4.C: Likewise. * g++.target/aarch64/sve/dup_sel_5.C: Likewise. * g++.target/aarch64/sve/dup_sel_6.C: Likewise. Co-Authored-By: Kugan Vivekanandarajah <kuganv@linaro.org> From-SVN: r274442
This commit is contained in:
parent
d29f7dd50d
commit
88a37c4d72
@ -1,3 +1,8 @@
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
* config/aarch64/aarch64-sve.md (*aarch64_sel_dup<mode>): New pattern.
|
||||
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
|
@ -3070,6 +3070,29 @@
|
||||
[(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
|
||||
)
|
||||
|
||||
;; Optimize selects between a duplicated scalar variable and another vector,
|
||||
;; the latter of which can be a zero constant or a variable. Treat duplicates
|
||||
;; of GPRs as being more expensive than duplicates of FPRs, since they
|
||||
;; involve a cross-file move.
|
||||
(define_insn "*aarch64_sel_dup<mode>"
|
||||
[(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
|
||||
(unspec:SVE_ALL
|
||||
[(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl")
|
||||
(vec_duplicate:SVE_ALL
|
||||
(match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
|
||||
(match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
|
||||
UNSPEC_SEL))]
|
||||
"TARGET_SVE"
|
||||
"@
|
||||
mov\t%0.<Vetype>, %3/m, %<vwcore>1
|
||||
mov\t%0.<Vetype>, %3/m, %<Vetype>1
|
||||
movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
|
||||
movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
|
||||
movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
|
||||
movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
|
||||
[(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [INT,FP] Compare and select
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -1,3 +1,13 @@
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
* g++.target/aarch64/sve/dup_sel_1.C: New test.
|
||||
* g++.target/aarch64/sve/dup_sel_2.C: Likewise.
|
||||
* g++.target/aarch64/sve/dup_sel_3.C: Likewise.
|
||||
* g++.target/aarch64/sve/dup_sel_4.C: Likewise.
|
||||
* g++.target/aarch64/sve/dup_sel_5.C: Likewise.
|
||||
* g++.target/aarch64/sve/dup_sel_6.C: Likewise.
|
||||
|
||||
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
|
||||
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
|
21
gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C
Normal file
21
gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t vnx4si __attribute__((vector_size(32)));
|
||||
|
||||
void
|
||||
foo (int32_t val)
|
||||
{
|
||||
register vnx4si x asm ("z0");
|
||||
register vnx4si y asm ("z0");
|
||||
asm volatile ("" : "=w" (y));
|
||||
val += 1;
|
||||
vnx4si z = { val, val, val, val, val, val, val, val };
|
||||
x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y;
|
||||
asm volatile ("" :: "w" (x));
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
20
gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C
Normal file
20
gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C
Normal file
@ -0,0 +1,20 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t vnx4si __attribute__((vector_size(32)));
|
||||
|
||||
void
|
||||
foo (int32_t val)
|
||||
{
|
||||
register vnx4si x asm ("z0");
|
||||
register vnx4si y asm ("z1");
|
||||
asm volatile ("" : "=w" (y));
|
||||
val += 1;
|
||||
vnx4si z = { val, val, val, val, val, val, val, val };
|
||||
x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y;
|
||||
asm volatile ("" :: "w" (x));
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */
|
21
gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C
Normal file
21
gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t vnx4si __attribute__((vector_size(32)));
|
||||
typedef float vnx4sf __attribute__((vector_size(32)));
|
||||
|
||||
void
|
||||
foo (float val)
|
||||
{
|
||||
register vnx4sf x asm ("z0");
|
||||
register vnx4sf y asm ("z0");
|
||||
asm volatile ("" : "=w" (y));
|
||||
vnx4sf z = { val, val, val, val, val, val, val, val };
|
||||
x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y;
|
||||
asm volatile ("" :: "w" (x));
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
20
gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C
Normal file
20
gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C
Normal file
@ -0,0 +1,20 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t vnx4si __attribute__((vector_size(32)));
|
||||
typedef float vnx4sf __attribute__((vector_size(32)));
|
||||
|
||||
void
|
||||
foo (float val)
|
||||
{
|
||||
register vnx4sf x asm ("z0");
|
||||
register vnx4sf y asm ("z1");
|
||||
asm volatile ("" : "=w" (y));
|
||||
vnx4sf z = { val, val, val, val, val, val, val, val };
|
||||
x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y;
|
||||
asm volatile ("" :: "w" (x));
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */
|
18
gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C
Normal file
18
gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t vnx4si __attribute__((vector_size(32)));
|
||||
|
||||
void
|
||||
foo (int32_t val)
|
||||
{
|
||||
register vnx4si x asm ("z0");
|
||||
val += 1;
|
||||
vnx4si y = { val, val, val, val, val, val, val, val };
|
||||
x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? y : (vnx4si) { 0 };
|
||||
asm volatile ("" :: "w" (x));
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tmovprfx\tz0\.s, p[0-7]/z, z0\.s\n\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */
|
18
gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C
Normal file
18
gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msve-vector-bits=256" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t vnx4si __attribute__((vector_size(32)));
|
||||
typedef float vnx4sf __attribute__((vector_size(32)));
|
||||
|
||||
void
|
||||
foo (float val)
|
||||
{
|
||||
register vnx4sf x asm ("z0");
|
||||
vnx4sf y = { val, val, val, val, val, val, val, val };
|
||||
x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? y : (vnx4sf) { 0 };
|
||||
asm volatile ("" :: "w" (x));
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tmovprfx\tz0\.s, p[0-7]/z, z0\.s\n\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */
|
Loading…
x
Reference in New Issue
Block a user