re PR tree-optimization/92819 (Worse code generated on avx2 due to simplify_vector_constructor)

2019-12-06  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92819
	* match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
	into the last lane.  For two-element vectors try inserting
	into the last lane when inserting into the first fails.

	* gcc.target/i386/pr92819-1.c: New testcase.
	* gcc.target/i386/pr92803.c: Adjust.

From-SVN: r279033
This commit is contained in:
Richard Biener 2019-12-06 07:53:15 +00:00 committed by Richard Biener
parent 9961856c3a
commit 2ef278569f
5 changed files with 48 additions and 6 deletions

View File

@ -1,3 +1,10 @@
2019-12-06 Richard Biener <rguenther@suse.de>
PR tree-optimization/92819
* match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
into the last lane. For two-element vectors try inserting
into the last lane when inserting into the first fails.
2019-12-06 Jakub Jelinek <jakub@redhat.com>
* common.opt (fprofile-partial-training): Terminate description with

View File

@ -6032,7 +6032,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| TREE_CODE (cop1) == VECTOR_CST
|| TREE_CODE (cop1) == CONSTRUCTOR))
{
if (sel.series_p (1, 1, nelts + 1, 1))
bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1);
if (insert_first_p)
{
/* After canonicalizing the first elt to come from the
first vector we only can insert the first elt from
@ -6041,13 +6042,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
if ((ins = fold_read_from_vector (cop0, sel[0])))
op0 = op1;
}
else
/* The above can fail for two-element vectors which always
appear to insert the first element, so try inserting
into the second lane as well. For more than two
elements that's wasted time. */
if (!insert_first_p || (!ins && maybe_eq (nelts, 2u)))
{
unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
for (at = 0; at < encoded_nelts; ++at)
if (maybe_ne (sel[at], at))
break;
if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
if (at < encoded_nelts
&& (known_eq (at + 1, nelts)
|| sel.series_p (at + 1, 1, at + 1, 1)))
{
if (known_lt (poly_uint64 (sel[at]), nelts))
ins = fold_read_from_vector (cop0, sel[at]);

View File

@ -1,3 +1,9 @@
2019-12-06 Richard Biener <rguenther@suse.de>
PR tree-optimization/92819
* gcc.target/i386/pr92819-1.c: New testcase.
* gcc.target/i386/pr92803.c: Adjust.
2019-12-05 Martin Sebor <msebor@redhat.com>
PR testsuite/92829

View File

@ -31,8 +31,10 @@ barf (v8sf x)
return (v4sf) { x[4], x[5], 1.0f, 2.0f };
}
/* We expect all CTORs to turn into permutes, the FP converting ones
/* For bar we do two inserts, first zero, then convert, then insert *p. } */
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
/* We expect all other CTORs to turn into permutes, the FP converting ones
to two each with the one with constants possibly elided in the future
by converting 3.0f and 1.0f "back" to integers. */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 4 "forwprop1" } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" { xfail *-*-* } } } */

View File

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-O -msse2 -fdump-tree-forwprop1" } */
typedef double v2df __attribute__((vector_size (16)));
v2df
foo (v2df x, double *p)
{
return (v2df) { x[0], *p };
}
v2df
bar (v2df x, double *p)
{
return (v2df) { *p, x[1] };
}
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
/* { dg-final { scan-assembler "movhpd" } } */
/* { dg-final { scan-assembler "movlpd" } } */