Rewrite SLP induction vectorization

This rewrites SLP induction vectorization to handle different
inductions in the different SLP lanes.  It also changes SLP
build to represent the initial value (but not the cycle) so
it can be enhanced to handle outer loop vectorization later.

Note this FAILs gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c
because it removes one CSE optimization that no longer works
with non-uniform initial value and step.  I'll see to recover
from this after outer loop vectorization of inductions works.

It might be a bit friendlier to variable-size vectors now
but then we're now building the step vector from scalars ...

2020-11-02  Richard Biener  <rguenther@suse.de>

	* tree.h (build_real_from_wide): Declare.
	* tree.c (build_real_from_wide): New function.
	* tree-vect-slp.c (vect_build_slp_tree_2): Remove
	restriction on induction vectorization, represent
	the initial value.
	* tree-vect-loop.c (vect_model_induction_cost): Inline ...
	(vectorizable_induction): ... here.  Rewrite SLP
	code generation.

	* gcc.dg/vect/slp-49.c: New testcase.
This commit is contained in:
Richard Biener 2020-11-02 12:38:04 +01:00
parent 86deadf8d3
commit e881774d0d
5 changed files with 258 additions and 183 deletions

View File

@ -0,0 +1,38 @@
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
int a[1024];
void __attribute__((noipa))
foo(int k)
{
int j = 5;
for (int i = 0; i < 512; ++i)
{
a[2*i] = j;
a[2*i+1] = k;
j++;
k+=3;
}
}
int
main()
{
check_vect ();
foo (17);
for (int i = 0; i < 512; ++i)
{
if (a[2*i] != 5 + i
|| a[2*i+1] != 17 + 3 * i)
__builtin_abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */

View File

@ -4443,34 +4443,6 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
}
/* Function vect_model_induction_cost.
Models cost for induction operations. */
static void
vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
stmt_vector_for_cost *cost_vec)
{
unsigned inside_cost, prologue_cost;
if (PURE_SLP_STMT (stmt_info))
return;
/* loop cost for vec_loop. */
inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
stmt_info, 0, vect_body);
/* prologue cost for vec_init and vec_step. */
prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
stmt_info, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_induction_cost: inside_cost = %d, "
"prologue_cost = %d .\n", inside_cost, prologue_cost);
}
/* Function get_initial_def_for_reduction
@ -7796,7 +7768,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (slp_node && !nunits.is_constant ())
{
/* The current SLP code creates the initial value element-by-element. */
/* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"SLP induction not supported for variable-length"
@ -7806,9 +7778,46 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (!vec_stmt) /* transformation not required. */
{
unsigned inside_cost = 0, prologue_cost = 0;
if (slp_node)
{
/* We eventually need to set a vector type on invariant
arguments. */
unsigned j;
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
if (!vect_maybe_update_slp_op_vectype
(child, SLP_TREE_VECTYPE (slp_node)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for "
"invariants\n");
return false;
}
/* loop cost for vec_loop. */
inside_cost
= record_stmt_cost (cost_vec,
SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
vector_stmt, stmt_info, 0, vect_body);
}
else /* if (!slp_node) */
{
/* loop cost for vec_loop. */
inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
stmt_info, 0, vect_body);
/* prologue cost for vec_init and vec_step. */
prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
stmt_info, 0, vect_prologue);
}
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_induction_cost: inside_cost = %d, "
"prologue_cost = %d .\n", inside_cost,
prologue_cost);
STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_induction");
vect_model_induction_cost (stmt_info, ncopies, cost_vec);
return true;
}
@ -7827,6 +7836,164 @@ vectorizable_induction (loop_vec_info loop_vinfo,
tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype);
pe = loop_preheader_edge (iv_loop);
/* Find the first insertion point in the BB. */
basic_block bb = gimple_bb (phi);
si = gsi_after_labels (bb);
/* For SLP induction we have to generate several IVs as for example
with group size 3 we need
[i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1]
[i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
if (slp_node)
{
/* Enforced above. */
unsigned int const_nunits = nunits.to_constant ();
/* The initial values are vectorized, but any lanes > group_size
need adjustment. */
slp_tree init_node
= SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
/* Gather steps. Since we do not vectorize inductions as
cycles we have to reconstruct the step from SCEV data. */
unsigned group_size = SLP_TREE_LANES (slp_node);
tree *steps = XALLOCAVEC (tree, group_size);
stmt_vec_info phi_info;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
/* Now generate the IVs. */
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
gcc_assert ((const_nunits * nvects) % group_size == 0);
unsigned nivs = least_common_multiple (group_size,
const_nunits) / const_nunits;
unsigned lup_mul = (nvects * const_nunits) / group_size;
tree stept = TREE_TYPE (step_vectype);
tree lupdate_mul
= build_vector_from_val (step_vectype,
SCALAR_FLOAT_TYPE_P (stept)
? build_real_from_wide (stept, lup_mul,
UNSIGNED)
: build_int_cstu (stept, lup_mul));
unsigned ivn;
auto_vec<tree> vec_steps;
for (ivn = 0; ivn < nivs; ++ivn)
{
tree_vector_builder elts (step_vectype, const_nunits, 1);
tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
tree elt = steps[(ivn*const_nunits + eltn) % group_size];
elts.quick_push (elt);
unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
? build_real_from_wide (stept,
mul_elt, UNSIGNED)
: build_int_cstu (stept, mul_elt));
}
stmts = NULL;
vec_step = gimple_build_vector (&stmts, &elts);
vec_step = gimple_convert (&stmts, step_vectype, vec_step);
vec_steps.safe_push (vec_step);
tree step_mul = gimple_build_vector (&stmts, &mul_elts);
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
/* Create the induction-phi that defines the induction-operand. */
vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
"vec_iv_");
induction_phi = create_phi_node (vec_dest, iv_loop->header);
induc_def = PHI_RESULT (induction_phi);
/* Create the iv update inside the loop */
stmts = NULL;
tree up = gimple_build (&stmts, MULT_EXPR, step_vectype,
vec_step, lupdate_mul);
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
stmts = NULL;
vec_def = gimple_convert (&stmts, step_vectype, induc_def);
vec_def = gimple_build (&stmts,
PLUS_EXPR, step_vectype, vec_def, up);
vec_def = gimple_convert (&stmts, vectype, vec_def);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
UNKNOWN_LOCATION);
vec_init = vect_get_slp_vect_def (init_node, ivn);
if (!integer_zerop (step_mul))
{
stmts = NULL;
vec_def = gimple_convert (&stmts, step_vectype, vec_init);
up = gimple_build (&stmts, MULT_EXPR, step_vectype,
vec_step, step_mul);
vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
vec_def, up);
vec_init = gimple_convert (&stmts, vectype, vec_def);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
/* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
}
/* Re-use IVs when we can. We are generating further vector
stmts by adding VF' * stride to the IVs generated above. */
if (ivn < nvects)
{
unsigned vfp
= least_common_multiple (group_size, const_nunits) / group_size;
tree lupdate_mul
= build_vector_from_val (step_vectype,
SCALAR_FLOAT_TYPE_P (stept)
? build_real_from_wide (stept,
vfp, UNSIGNED)
: build_int_cstu (stept, vfp));
for (; ivn < nvects; ++ivn)
{
gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
tree def = gimple_get_lhs (iv);
gimple_seq stmts = NULL;
if (ivn < 2*nivs)
{
vec_steps[ivn - nivs]
= gimple_build (&stmts, MULT_EXPR, step_vectype,
vec_steps[ivn - nivs], lupdate_mul);
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
}
stmts = NULL;
def = gimple_convert (&stmts, step_vectype, def);
def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
def, vec_steps[ivn % nivs]);
def = gimple_convert (&stmts, vectype, def);
if (gimple_code (iv) == GIMPLE_PHI)
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
else
{
gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
}
SLP_TREE_VEC_STMTS (slp_node)
.quick_push (SSA_NAME_DEF_STMT (def));
}
}
return true;
}
init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
loop_preheader_edge (iv_loop));
@ -7860,147 +8027,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gcc_assert (!new_bb);
}
/* Find the first insertion point in the BB. */
basic_block bb = gimple_bb (phi);
si = gsi_after_labels (bb);
/* For SLP induction we have to generate several IVs as for example
with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
[i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform
[VF*S, VF*S, VF*S, VF*S] for all. */
if (slp_node)
{
/* Enforced above. */
unsigned int const_nunits = nunits.to_constant ();
/* Generate [VF*S, VF*S, ... ]. */
if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
{
expr = build_int_cst (integer_type_node, vf);
expr = fold_convert (TREE_TYPE (step_expr), expr);
}
else
expr = build_int_cst (TREE_TYPE (step_expr), vf);
new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
expr, step_expr);
if (! CONSTANT_CLASS_P (new_name))
new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
TREE_TYPE (step_expr), NULL);
new_vec = build_vector_from_val (step_vectype, new_name);
vec_step = vect_init_vector (loop_vinfo, stmt_info,
new_vec, step_vectype, NULL);
/* Now generate the IVs. */
unsigned group_size = SLP_TREE_LANES (slp_node);
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
unsigned elts = const_nunits * nvects;
/* Compute the number of distinct IVs we need. First reduce
group_size if it is a multiple of const_nunits so we get
one IV for a group_size of 4 but const_nunits 2. */
unsigned group_sizep = group_size;
if (group_sizep % const_nunits == 0)
group_sizep = group_sizep / const_nunits;
unsigned nivs = least_common_multiple (group_sizep,
const_nunits) / const_nunits;
gcc_assert (elts % group_size == 0);
tree elt = init_expr;
unsigned ivn;
for (ivn = 0; ivn < nivs; ++ivn)
{
tree_vector_builder elts (step_vectype, const_nunits, 1);
stmts = NULL;
for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
if (ivn*const_nunits + eltn >= group_size
&& (ivn * const_nunits + eltn) % group_size == 0)
elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
elt, step_expr);
elts.quick_push (elt);
}
vec_init = gimple_build_vector (&stmts, &elts);
vec_init = gimple_convert (&stmts, vectype, vec_init);
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
/* Create the induction-phi that defines the induction-operand. */
vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
induction_phi = create_phi_node (vec_dest, iv_loop->header);
induc_def = PHI_RESULT (induction_phi);
/* Create the iv update inside the loop */
gimple_seq stmts = NULL;
vec_def = gimple_convert (&stmts, step_vectype, induc_def);
vec_def = gimple_build (&stmts,
PLUS_EXPR, step_vectype, vec_def, vec_step);
vec_def = gimple_convert (&stmts, vectype, vec_def);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
/* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
UNKNOWN_LOCATION);
SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
}
/* Fill up to the number of vectors we need for the whole group. */
nivs = least_common_multiple (group_size,
const_nunits) / const_nunits;
for (; ivn < nivs; ++ivn)
SLP_TREE_VEC_STMTS (slp_node)
.quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
/* Re-use IVs when we can. */
if (ivn < nvects)
{
unsigned vfp
= least_common_multiple (group_size, const_nunits) / group_size;
/* Generate [VF'*S, VF'*S, ... ]. */
if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
{
expr = build_int_cst (integer_type_node, vfp);
expr = fold_convert (TREE_TYPE (step_expr), expr);
}
else
expr = build_int_cst (TREE_TYPE (step_expr), vfp);
new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
expr, step_expr);
if (! CONSTANT_CLASS_P (new_name))
new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
TREE_TYPE (step_expr), NULL);
new_vec = build_vector_from_val (step_vectype, new_name);
vec_step = vect_init_vector (loop_vinfo, stmt_info, new_vec,
step_vectype, NULL);
for (; ivn < nvects; ++ivn)
{
gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
tree def;
if (gimple_code (iv) == GIMPLE_PHI)
def = gimple_phi_result (iv);
else
def = gimple_assign_lhs (iv);
gimple_seq stmts = NULL;
def = gimple_convert (&stmts, step_vectype, def);
def = gimple_build (&stmts,
PLUS_EXPR, step_vectype, def, vec_step);
def = gimple_convert (&stmts, vectype, def);
if (gimple_code (iv) == GIMPLE_PHI)
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
else
{
gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
}
SLP_TREE_VEC_STMTS (slp_node)
.quick_push (SSA_NAME_DEF_STMT (def));
}
}
return true;
}
/* Create the vector that holds the initial_value of the induction. */
if (nested_in_vect_loop)
{

View File

@ -1441,20 +1441,14 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
return NULL;
vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
/* Induction from different IVs is not supported. */
if (def_type == vect_induction_def)
{
stmt_vec_info other_info;
FOR_EACH_VEC_ELT (stmts, i, other_info)
if (stmt_info != other_info)
return NULL;
/* Induction PHIs are leafs. */
(*tree_size)++;
node = vect_create_new_slp_node (node, stmts, nops);
SLP_TREE_VECTYPE (node) = vectype;
SLP_TREE_CHILDREN (node).quick_grow_cleared (nops);
return node;
/* Induction PHIs are not cycles but walk the initial
value. */
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
if (nested_in_vect_loop_p (loop, stmt_info))
loop = loop->inner;
skip_args[loop_latch_edge (loop)->dest_idx] = true;
}
else if (def_type == vect_reduction_def
|| def_type == vect_double_reduction_def

View File

@ -2250,6 +2250,22 @@ build_real_from_int_cst (tree type, const_tree i)
return v;
}
/* Return a new REAL_CST node whose type is TYPE
and whose value is the integer value I which has sign SGN. */
tree
build_real_from_wide (tree type, const wide_int_ref &i, signop sgn)
{
REAL_VALUE_TYPE d;
/* Clear all bits of the real value type so that we can later do
bitwise comparisons to see if two values are the same. */
memset (&d, 0, sizeof d);
real_from_integer (&d, TYPE_MODE (type), i, sgn);
return build_real (type, d);
}
/* Return a newly constructed STRING_CST node whose value is the LEN
characters at STR when STR is nonnull, or all zeros otherwise.
Note that for a C string literal, LEN should include the trailing NUL.

View File

@ -4429,6 +4429,7 @@ extern tree build_constructor_from_vec (tree, const vec<tree, va_gc> *);
extern tree build_constructor_va (tree, int, ...);
extern tree build_clobber (tree);
extern tree build_real_from_int_cst (tree, const_tree);
extern tree build_real_from_wide (tree, const wide_int_ref &, signop);
extern tree build_complex (tree, tree, tree);
extern tree build_complex_inf (tree, bool);
extern tree build_each_one_cst (tree);