re PR fortran/62283 (basic-block vectorization fails)

2015-04-28  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/62283
	* tree-vect-slp.c (vect_build_slp_tree): When the SLP build
	fails fatally and we are vectorizing a basic-block simply
	cause the child to be constructed piecewise.
	(vect_analyze_slp_cost_1): Adjust.
	(vect_detect_hybrid_slp_stmts): Likewise.
	(vect_bb_slp_scalar_cost): Likewise.
	(vect_get_constant_vectors): For piecewise constructed
	constants place them after the last def.
	(vect_get_slp_defs): Adjust.
	* tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
	externals for basic-block vectorization.

	* gfortran.dg/vect/pr62283-2.f: New testcase.
	* gcc.dg/vect/bb-slp-14.c: Adjust.

From-SVN: r222514
This commit is contained in:
Richard Biener 2015-04-28 08:30:44 +00:00 committed by Richard Biener
parent fde9b31b61
commit 90dd6e3df8
6 changed files with 111 additions and 24 deletions

View File

@ -1,3 +1,18 @@
2015-04-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/62283
* tree-vect-slp.c (vect_build_slp_tree): When the SLP build
fails fatally and we are vectorizing a basic-block simply
cause the child to be constructed piecewise.
(vect_analyze_slp_cost_1): Adjust.
(vect_detect_hybrid_slp_stmts): Likewise.
(vect_bb_slp_scalar_cost): Likewise.
(vect_get_constant_vectors): For piecewise constructed
constants place them after the last def.
(vect_get_slp_defs): Adjust.
* tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
externals for basic-block vectorization.
2015-04-28 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR target/63503

View File

@ -1,3 +1,9 @@
2015-04-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/62283
* gfortran.dg/vect/pr62283-2.f: New testcase.
* gcc.dg/vect/bb-slp-14.c: Adjust.
2015-04-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/65851

View File

@ -14,7 +14,8 @@ main1 (unsigned int x, unsigned int y)
int i;
unsigned int a0, a1, a2, a3;
/* Not consecutive load with permutation - not supported. */
/* Not consecutive load with permutation - supported with building up
the vector from scalars. */
a0 = in[0] + 23;
a1 = in[1] + 142;
a2 = in[1] + 2;
@ -47,6 +48,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
/* { dg-final { cleanup-tree-dump "slp2" } } */

View File

@ -0,0 +1,13 @@
! { dg-do compile }
! { dg-require-effective-target vect_float }
! { dg-additional-options "-fdump-tree-slp2-details" }
subroutine saxpy(alpha,x,y)
real x(4),y(4),alpha
y(1)=y(1)+alpha*x(1)
y(2)=y(2)+alpha*x(2)
y(3)=y(3)+alpha*x(3)
y(4)=y(4)+alpha*x(4)
end
! { dg-final { scan-tree-dump "basic block vectorized" "slp2" } }
! { dg-final { cleanup-tree-dump "slp2" } }
! { dg-final { cleanup-tree-dump "vect" } }

View File

@ -1017,6 +1017,29 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
continue;
}
/* If the SLP build failed fatally and we analyze a basic-block
simply treat nodes we fail to build as externally defined
(and thus build vectors from the scalar defs).
The cost model will reject outright expensive cases.
??? This doesn't treat cases where permutation ultimatively
fails (or we don't try permutation below). Ideally we'd
even compute a permutation that will end up with the maximum
SLP tree size... */
if (bb_vinfo
&& !matches[0]
/* ??? Rejecting patterns this way doesn't work. We'd have to
do extra work to cancel the pattern so the uses see the
scalar version. */
&& !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
{
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands from scalars\n");
oprnd_info->def_stmts = vNULL;
vect_free_slp_tree (child);
SLP_TREE_CHILDREN (*node).quick_push (NULL);
continue;
}
/* If the SLP build for operand zero failed and operand zero
and one can be commutated try that for the scalar stmts
that failed the match. */
@ -1417,9 +1440,10 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
instance, child, prologue_cost_vec,
ncopies_for_cost);
if (child)
vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
instance, child, prologue_cost_vec,
ncopies_for_cost);
/* Look at the first scalar stmt to determine the cost. */
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@ -1885,7 +1909,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
STMT_SLP_TYPE (stmt_vinfo) = hybrid;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
vect_detect_hybrid_slp_stmts (child, i, stype);
if (child)
vect_detect_hybrid_slp_stmts (child, i, stype);
}
/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
@ -2162,7 +2187,8 @@ vect_bb_slp_scalar_cost (basic_block bb,
}
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
if (child)
scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
return scalar_cost;
}
@ -2612,6 +2638,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
number_of_places_left_in_vector = nunits;
elts = XALLOCAVEC (tree, nunits);
bool place_after_defs = false;
for (j = 0; j < number_of_copies; j++)
{
for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
@ -2682,6 +2709,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
/* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--;
tree orig_op = op;
if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
{
if (CONSTANT_CLASS_P (op))
@ -2704,6 +2732,12 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
elts[number_of_places_left_in_vector] = op;
if (!CONSTANT_CLASS_P (op))
constant_p = false;
if (TREE_CODE (orig_op) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (orig_op)
&& STMT_VINFO_BB_VINFO (stmt_vinfo)
&& (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
== gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
place_after_defs = true;
if (number_of_places_left_in_vector == 0)
{
@ -2720,16 +2754,25 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
vec_cst = build_constructor (vector_type, v);
}
voprnds.quick_push (vect_init_vector (stmt, vec_cst,
vector_type, NULL));
tree init;
gimple_stmt_iterator gsi;
if (place_after_defs)
{
gsi = gsi_for_stmt
(vect_find_last_scalar_stmt_in_slp (slp_node));
init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
}
else
init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
if (ctor_seq != NULL)
{
gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
gsi_insert_seq_before_without_update (&gsi, ctor_seq,
GSI_SAME_STMT);
ctor_seq = NULL;
}
voprnds.quick_push (init);
place_after_defs = false;
}
}
}
@ -2825,20 +2868,26 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
child = SLP_TREE_CHILDREN (slp_node)[child_index];
/* We have to check both pattern and original def, if available. */
gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
|| (related
&& operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
if (child)
{
/* The number of vector defs is determined by the number of
vector statements in the node from which we get those
statements. */
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
vectorized_defs = true;
child_index++;
gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
gimple related
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
|| (related
&& operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
{
/* The number of vector defs is determined by the number of
vector statements in the node from which we get those
statements. */
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
vectorized_defs = true;
child_index++;
}
}
else
child_index++;
}
if (!vectorized_defs)

View File

@ -7752,7 +7752,10 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
else
{
stmt_vinfo = vinfo_for_stmt (*def_stmt);
*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
*dt = vect_external_def;
else
*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
}
if (dump_enabled_p ())