mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-21 12:41:19 +08:00
re PR tree-optimization/92819 (Worse code generated on avx2 due to simplify_vector_constructor)
2019-12-06 Richard Biener <rguenther@suse.de> PR tree-optimization/92819 * match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts into the last lane. For two-element vectors try inserting into the last lane when inserting into the first fails. * gcc.target/i386/pr92819-1.c: New testcase. * gcc.target/i386/pr92803.c: Adjust. From-SVN: r279033
This commit is contained in:
parent
9961856c3a
commit
2ef278569f
@ -1,3 +1,10 @@
|
||||
2019-12-06 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92819
|
||||
* match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
|
||||
into the last lane. For two-element vectors try inserting
|
||||
into the last lane when inserting into the first fails.
|
||||
|
||||
2019-12-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* common.opt (fprofile-partial-training): Terminate description with
|
||||
|
13
gcc/match.pd
13
gcc/match.pd
@ -6032,7 +6032,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
|| TREE_CODE (cop1) == VECTOR_CST
|
||||
|| TREE_CODE (cop1) == CONSTRUCTOR))
|
||||
{
|
||||
if (sel.series_p (1, 1, nelts + 1, 1))
|
||||
bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1);
|
||||
if (insert_first_p)
|
||||
{
|
||||
/* After canonicalizing the first elt to come from the
|
||||
first vector we only can insert the first elt from
|
||||
@ -6041,13 +6042,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
if ((ins = fold_read_from_vector (cop0, sel[0])))
|
||||
op0 = op1;
|
||||
}
|
||||
else
|
||||
/* The above can fail for two-element vectors which always
|
||||
appear to insert the first element, so try inserting
|
||||
into the second lane as well. For more than two
|
||||
elements that's wasted time. */
|
||||
if (!insert_first_p || (!ins && maybe_eq (nelts, 2u)))
|
||||
{
|
||||
unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
|
||||
for (at = 0; at < encoded_nelts; ++at)
|
||||
if (maybe_ne (sel[at], at))
|
||||
break;
|
||||
if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
|
||||
if (at < encoded_nelts
|
||||
&& (known_eq (at + 1, nelts)
|
||||
|| sel.series_p (at + 1, 1, at + 1, 1)))
|
||||
{
|
||||
if (known_lt (poly_uint64 (sel[at]), nelts))
|
||||
ins = fold_read_from_vector (cop0, sel[at]);
|
||||
|
@ -1,3 +1,9 @@
|
||||
2019-12-06 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92819
|
||||
* gcc.target/i386/pr92819-1.c: New testcase.
|
||||
* gcc.target/i386/pr92803.c: Adjust.
|
||||
|
||||
2019-12-05 Martin Sebor <msebor@redhat.com>
|
||||
|
||||
PR testsuite/92829
|
||||
|
@ -31,8 +31,10 @@ barf (v8sf x)
|
||||
return (v4sf) { x[4], x[5], 1.0f, 2.0f };
|
||||
}
|
||||
|
||||
/* We expect all CTORs to turn into permutes, the FP converting ones
|
||||
/* For bar we do two inserts, first zero, then convert, then insert *p. } */
|
||||
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
|
||||
/* We expect all other CTORs to turn into permutes, the FP converting ones
|
||||
to two each with the one with constants possibly elided in the future
|
||||
by converting 3.0f and 1.0f "back" to integers. */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 4 "forwprop1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" { xfail *-*-* } } } */
|
||||
|
20
gcc/testsuite/gcc.target/i386/pr92819-1.c
Normal file
20
gcc/testsuite/gcc.target/i386/pr92819-1.c
Normal file
@ -0,0 +1,20 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -msse2 -fdump-tree-forwprop1" } */
|
||||
|
||||
typedef double v2df __attribute__((vector_size (16)));
|
||||
|
||||
v2df
|
||||
foo (v2df x, double *p)
|
||||
{
|
||||
return (v2df) { x[0], *p };
|
||||
}
|
||||
|
||||
v2df
|
||||
bar (v2df x, double *p)
|
||||
{
|
||||
return (v2df) { *p, x[1] };
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
|
||||
/* { dg-final { scan-assembler "movhpd" } } */
|
||||
/* { dg-final { scan-assembler "movlpd" } } */
|
Loading…
x
Reference in New Issue
Block a user