mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-26 20:56:16 +08:00
[AArch64] PR tree-optimization/90332: Implement vec_init<M><N> where N is a vector mode
This patch fixes the failing gcc.dg/vect/slp-reduc-sad-2.c testcase on aarch64 by implementing a vec_init optab that can handle two half-width vectors producing a full-width one by concatenating them. In the gcc.dg/vect/slp-reduc-sad-2.c case it's a V8QI reg concatenated with a V8QI const_vector of zeroes. This can be implemented efficiently using the aarch64_combinez pattern that just loads a D-register to make use of the implicit zero-extending semantics of that load. Otherwise it concatenates the two vector using aarch64_simd_combine. With this patch I'm seeing the effect from richi's original patch that added gcc.dg/vect/slp-reduc-sad-2.c on aarch64 and 525.x264_r improves by about 1.5%. PR tree-optimization/90332 * config/aarch64/aarch64.c (aarch64_expand_vector_init): Handle VALS containing two vectors. * config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Rename to... (@aarch64_combinez<mode>): ... This. (*aarch64_combinez_be<mode>): Rename to... (@aarch64_combinez_be<mode>): ... This. (vec_init<mode><Vhalf>): New define_expand. * config/aarch64/iterators.md (Vhalf): Handle V8HF. From-SVN: r272002
This commit is contained in:
parent
a2dbc0bf2a
commit
41dab855dc
@ -1,3 +1,16 @@
|
||||
2019-06-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
PR tree-optimization/90332
|
||||
* config/aarch64/aarch64.c (aarch64_expand_vector_init):
|
||||
Handle VALS containing two vectors.
|
||||
* config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Rename
|
||||
to...
|
||||
(@aarch64_combinez<mode>): ... This.
|
||||
(*aarch64_combinez_be<mode>): Rename to...
|
||||
(@aarch64_combinez_be<mode>): ... This.
|
||||
(vec_init<mode><Vhalf>): New define_expand.
|
||||
* config/aarch64/iterators.md (Vhalf): Handle V8HF.
|
||||
|
||||
2019-06-06 Jozef Lawrynowicz <jozef.l@mittosystems.com>
|
||||
|
||||
* config/msp430/msp430.md (ashlhi3): Use the const_variant of shift
|
||||
|
@ -3216,7 +3216,7 @@
|
||||
;; In this insn, operand 1 should be low, and operand 2 the high part of the
|
||||
;; dest vector.
|
||||
|
||||
(define_insn "*aarch64_combinez<mode>"
|
||||
(define_insn "@aarch64_combinez<mode>"
|
||||
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
|
||||
(vec_concat:<VDBL>
|
||||
(match_operand:VDC 1 "general_operand" "w,?r,m")
|
||||
@ -3230,7 +3230,7 @@
|
||||
(set_attr "arch" "simd,fp,simd")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_combinez_be<mode>"
|
||||
(define_insn "@aarch64_combinez_be<mode>"
|
||||
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
|
||||
(vec_concat:<VDBL>
|
||||
(match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
|
||||
@ -5959,6 +5959,15 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_init<mode><Vhalf>"
|
||||
[(match_operand:VQ_NO2E 0 "register_operand" "")
|
||||
(match_operand 1 "" "")]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
aarch64_expand_vector_init (operands[0], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "*aarch64_simd_ld1r<mode>"
|
||||
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
||||
(vec_duplicate:VALL_F16
|
||||
|
@ -15106,6 +15106,45 @@ aarch64_expand_vector_init (rtx target, rtx vals)
|
||||
rtx v0 = XVECEXP (vals, 0, 0);
|
||||
bool all_same = true;
|
||||
|
||||
/* This is a special vec_init<M><N> where N is not an element mode but a
|
||||
vector mode with half the elements of M. We expect to find two entries
|
||||
of mode N in VALS and we must put their concatentation into TARGET. */
|
||||
if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
|
||||
{
|
||||
gcc_assert (known_eq (GET_MODE_SIZE (mode),
|
||||
2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
|
||||
rtx lo = XVECEXP (vals, 0, 0);
|
||||
rtx hi = XVECEXP (vals, 0, 1);
|
||||
machine_mode narrow_mode = GET_MODE (lo);
|
||||
gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
|
||||
gcc_assert (narrow_mode == GET_MODE (hi));
|
||||
|
||||
/* When we want to concatenate a half-width vector with zeroes we can
|
||||
use the aarch64_combinez[_be] patterns. Just make sure that the
|
||||
zeroes are in the right half. */
|
||||
if (BYTES_BIG_ENDIAN
|
||||
&& aarch64_simd_imm_zero (lo, narrow_mode)
|
||||
&& general_operand (hi, narrow_mode))
|
||||
emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
|
||||
else if (!BYTES_BIG_ENDIAN
|
||||
&& aarch64_simd_imm_zero (hi, narrow_mode)
|
||||
&& general_operand (lo, narrow_mode))
|
||||
emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
|
||||
else
|
||||
{
|
||||
/* Else create the two half-width registers and combine them. */
|
||||
if (!REG_P (lo))
|
||||
lo = force_reg (GET_MODE (lo), lo);
|
||||
if (!REG_P (hi))
|
||||
hi = force_reg (GET_MODE (hi), hi);
|
||||
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
std::swap (lo, hi);
|
||||
emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Count the number of variable elements to initialise. */
|
||||
for (int i = 0; i < n_elts; ++i)
|
||||
{
|
||||
|
@ -768,6 +768,7 @@
|
||||
;; Half modes of all vector modes, in lower-case.
|
||||
(define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi")
|
||||
(V4HI "v2hi") (V8HI "v4hi")
|
||||
(V8HF "v4hf")
|
||||
(V2SI "si") (V4SI "v2si")
|
||||
(V2DI "di") (V2SF "sf")
|
||||
(V4SF "v2sf") (V2DF "df")])
|
||||
|
Loading…
Reference in New Issue
Block a user