tree-optimization/92819 restrict new vector CTOR canonicalization

The PR shows that code generation ends up pessimized by the new
canonicalization rules that end up nailing do-not-care elements
to specific values making it hard to generate good code later.

The temporary solution is to avoid this for the cases we also
obviously know the canonicalization will create more GIMPLE stmts than
before.

2020-02-04  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92819
	* tree-ssa-forwprop.c (simplify_vector_constructor): Avoid
	generating more stmts than before.

	* gcc.target/i386/pr92819.c: New testcase.
	* gcc.target/i386/pr92803.c: Adjust.
This commit is contained in:
Richard Biener 2020-02-04 10:03:03 +01:00
parent 9a4d502421
commit fc98d03861
5 changed files with 73 additions and 4 deletions

View File

@ -1,3 +1,9 @@
2020-02-04 Richard Biener <rguenther@suse.de>
PR tree-optimization/92819
* tree-ssa-forwprop.c (simplify_vector_constructor): Avoid
generating more stmts than before.
2020-02-04 Martin Liska <mliska@suse.cz>
* config/arm/arm.c (arm_gen_far_branch): Move the function

View File

@ -1,3 +1,9 @@
2020-02-04 Richard Biener <rguenther@suse.de>
PR tree-optimization/92819
* gcc.target/i386/pr92819.c: New testcase.
* gcc.target/i386/pr92803.c: Adjust.
2020-02-03 Iain Sandoe <iain@sandoe.co.uk>
PR c++/93458

View File

@ -36,5 +36,6 @@ barf (v8sf x)
/* We expect all other CTORs to turn into permutes, the FP converting ones
to two each with the one with constants possibly elided in the future
by converting 3.0f and 1.0f "back" to integers. */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 4 "forwprop1" } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" { xfail *-*-* } } } */
/* For foo we do nothing. */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "forwprop1" { xfail *-*-* } } } */

View File

@ -0,0 +1,45 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx2 -fdump-tree-forwprop1" } */
typedef double v4df __attribute__((vector_size (32)));
typedef double v2df __attribute__((vector_size (16)));
typedef short v16hi __attribute__((vector_size (32)));
typedef short v8hi __attribute__((vector_size (16)));
v2df
foo (v4df x, double *p)
{
return (v2df) { x[1], *p };
}
v2df
bar (v4df x, double *p)
{
return (v2df) { x[0], *p }; /* BIT_INSERT_EXPR */
}
v2df
baz (v2df x, double *p)
{
return (v2df) { x[1], *p }; /* VEC_PERM_EXPR */
}
v2df
qux (v2df x, double *p)
{
return (v2df) { x[0], *p }; /* BIT_INSERT_EXPR */
}
v2df
corge (v4df x, double *p)
{
return (v2df) { x[3], *p };
}
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop1" } } */
/* We can't check for 1:1 assembler here so check for what we do not
want to see. */
/* { dg-final { scan-assembler-not { "perm" } } } */
/* { dg-final { scan-assembler-not { "insert" } } } */
/* { dg-final { scan-assembler-not { "broadcast" } } } */

View File

@ -2230,7 +2230,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
unsigned HOST_WIDE_INT refnelts;
enum tree_code conv_code;
constructor_elt *elt;
bool maybe_ident;
op = gimple_assign_rhs1 (stmt);
type = TREE_TYPE (op);
@ -2245,7 +2244,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
orig[0] = NULL;
orig[1] = NULL;
conv_code = ERROR_MARK;
maybe_ident = true;
bool maybe_ident = true;
bool maybe_blend[2] = { true, true };
tree one_constant = NULL_TREE;
tree one_nonconstant = NULL_TREE;
auto_vec<tree> constants;
@ -2290,6 +2290,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
orig[j] = ref;
if (elem != i || j != 0)
maybe_ident = false;
if (elem != i)
maybe_blend[j] = false;
elts.safe_push (std::make_pair (j, elem));
continue;
}
@ -2439,6 +2441,15 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
}
else
{
/* If we combine a vector with a non-vector avoid cases where
we'll obviously end up with more GIMPLE stmts which is when
we'll later not fold this to a single insert into the vector
and we had a single extract originally. See PR92819. */
if (nelts == 2
&& refnelts > 2
&& orig[1] == error_mark_node
&& !maybe_blend[0])
return false;
tree mask_type, perm_type, conv_src_type;
perm_type = TREE_TYPE (orig[0]);
conv_src_type = (nelts == refnelts