cost invariant nodes from vect_slp_analyze_node_operations SLP walk

2020-05-19  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (_slp_tree::vectype): Add field.
	(SLP_TREE_VECTYPE): New.
	* tree-vect-slp.c (vect_create_new_slp_node): Initialize
	SLP_TREE_VECTYPE.
	(vect_create_new_slp_node): Likewise.
	(vect_prologue_cost_for_slp): Move here from tree-vect-stmts.c
	and simplify.
	(vect_slp_analyze_node_operations): Walk nodes children for
	invariant costing.
	(vect_get_constant_vectors): Use local scope op variable.
	* tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Remove here.
	(vect_model_simple_cost): Adjust.
	(vect_model_store_cost): Likewise.
	(vectorizable_store): Likewise.
This commit is contained in:
Richard Biener 2020-05-18 16:05:00 +02:00
parent 573e5f0500
commit a4b48fc47c
4 changed files with 107 additions and 89 deletions

View File

@ -1,3 +1,20 @@
2020-05-19 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (_slp_tree::vectype): Add field.
(SLP_TREE_VECTYPE): New.
* tree-vect-slp.c (vect_create_new_slp_node): Initialize
SLP_TREE_VECTYPE.
(vect_create_new_slp_node): Likewise.
(vect_prologue_cost_for_slp): Move here from tree-vect-stmts.c
and simplify.
(vect_slp_analyze_node_operations): Walk nodes children for
invariant costing.
(vect_get_constant_vectors): Use local scope op variable.
* tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Remove here.
(vect_model_simple_cost): Adjust.
(vect_model_store_cost): Likewise.
(vectorizable_store): Likewise.
2020-05-18 Martin Sebor <msebor@redhat.com>
PR middle-end/92815

View File

@ -129,6 +129,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
SLP_TREE_VECTYPE (node) = NULL_TREE;
node->refcnt = 1;
node->max_nunits = 1;
@ -155,6 +156,7 @@ vect_create_new_slp_node (vec<tree> ops)
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_external_def;
SLP_TREE_VECTYPE (node) = NULL_TREE;
node->refcnt = 1;
node->max_nunits = 1;
@ -2720,6 +2722,66 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
return true;
}
/* Compute the prologue cost for invariant or constant operands represented
by NODE. */
static void
vect_prologue_cost_for_slp (vec_info *vinfo,
slp_tree node,
stmt_vector_for_cost *cost_vec)
{
/* Without looking at the actual initializer a vector of
constants can be implemented as load from the constant pool.
When all elements are the same we can use a splat. */
tree vectype = SLP_TREE_VECTYPE (node);
/* ??? Ideally we'd want all invariant nodes to have a vectype. */
if (!vectype)
vectype = get_vectype_for_scalar_type (vinfo,
TREE_TYPE (SLP_TREE_SCALAR_OPS
(node)[0]), node);
unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
unsigned num_vects_to_check;
unsigned HOST_WIDE_INT const_nunits;
unsigned nelt_limit;
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
&& ! multiple_p (const_nunits, group_size))
{
num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
nelt_limit = const_nunits;
}
else
{
/* If either the vector has variable length or the vectors
are composed of repeated whole groups we only need to
cost construction once. All vectors will be the same. */
num_vects_to_check = 1;
nelt_limit = group_size;
}
tree elt = NULL_TREE;
unsigned nelt = 0;
for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
{
unsigned si = j % group_size;
if (nelt == 0)
elt = SLP_TREE_SCALAR_OPS (node)[si];
/* ??? We're just tracking whether all operands of a single
vector initializer are the same, ideally we'd check if
we emitted the same one already. */
else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
elt = NULL_TREE;
nelt++;
if (nelt == nelt_limit)
{
record_stmt_cost (cost_vec, 1,
SLP_TREE_DEF_TYPE (node) == vect_external_def
? (elt ? scalar_to_vec : vec_construct)
: vector_load,
NULL, vectype, 0, vect_prologue);
nelt = 0;
}
}
}
/* Analyze statements contained in SLP tree NODE after recursively analyzing
the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
@ -2735,6 +2797,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
int i, j;
slp_tree child;
/* Assume we can code-generate all invariants. */
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return true;
@ -2798,6 +2861,26 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
/* When the node can be vectorized cost invariant nodes it references.
This is not done in DFS order to allow the refering node
vectorizable_* calls to nail down the invariant nodes vector type
and possibly unshare it if it needs a different vector type than
other referrers. */
if (res)
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
{
/* ??? After auditing more code paths make a "default"
and push the vector type from NODE to all children
if it is not already set. */
/* Perform usual caching, note code-generation still
code-gens these nodes multiple times but we expect
to CSE them later. */
if (!visited.contains (child)
&& !lvisited.add (child))
vect_prologue_cost_for_slp (vinfo, child, cost_vec);
}
/* If this node can't be vectorized, try pruning the tree here rather
than felling the whole thing. */
if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
@ -3600,6 +3683,7 @@ vect_get_constant_vectors (vec_info *vinfo,
stmt_vec_info insert_after = NULL;
for (j = 0; j < number_of_copies; j++)
{
tree op;
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
{
/* Create 'vect_ = {op0,op1,...,opn}'. */

View File

@ -786,68 +786,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
return opt_result::success ();
}
/* Compute the prologue cost for invariant or constant operands. */
static unsigned
vect_prologue_cost_for_slp_op (vec_info *vinfo,
slp_tree node,
unsigned opno, enum vect_def_type dt,
stmt_vector_for_cost *cost_vec)
{
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
tree op = gimple_op (stmt, opno);
unsigned prologue_cost = 0;
/* Without looking at the actual initializer a vector of
constants can be implemented as load from the constant pool.
When all elements are the same we can use a splat. */
tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
unsigned num_vects_to_check;
unsigned HOST_WIDE_INT const_nunits;
unsigned nelt_limit;
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
&& ! multiple_p (const_nunits, group_size))
{
num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
nelt_limit = const_nunits;
}
else
{
/* If either the vector has variable length or the vectors
are composed of repeated whole groups we only need to
cost construction once. All vectors will be the same. */
num_vects_to_check = 1;
nelt_limit = group_size;
}
tree elt = NULL_TREE;
unsigned nelt = 0;
for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
{
unsigned si = j % group_size;
if (nelt == 0)
elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
/* ??? We're just tracking whether all operands of a single
vector initializer are the same, ideally we'd check if
we emitted the same one already. */
else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
opno))
elt = NULL_TREE;
nelt++;
if (nelt == nelt_limit)
{
prologue_cost += record_stmt_cost
(cost_vec, 1,
dt == vect_external_def
? (elt ? scalar_to_vec : vec_construct) : vector_load,
NULL, vectype, 0, vect_prologue);
nelt = 0;
}
}
return prologue_cost;
}
/* Function vect_model_simple_cost.
Models cost for simple operations, i.e. those that only emit ncopies of a
@ -855,7 +793,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo,
be generated for the single vector op. We will handle that shortly. */
static void
vect_model_simple_cost (vec_info *vinfo,
vect_model_simple_cost (vec_info *,
stmt_vec_info stmt_info, int ncopies,
enum vect_def_type *dt,
int ndts,
@ -871,26 +809,7 @@ vect_model_simple_cost (vec_info *vinfo,
if (node)
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
if (node)
{
/* Scan operands and account for prologue cost of constants/externals.
??? This over-estimates cost for multiple uses and should be
re-engineered. */
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
tree lhs = gimple_get_lhs (stmt);
for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
{
tree op = gimple_op (stmt, i);
enum vect_def_type dt;
if (!op || op == lhs)
continue;
if (vect_is_simple_use (op, vinfo, &dt)
&& (dt == vect_constant_def || dt == vect_external_def))
prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
i, dt, cost_vec);
}
}
else
if (!node)
/* Cost the "broadcast" of a scalar operand in to a vector operand.
Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
cost model. */
@ -991,7 +910,6 @@ cfun_returns (tree decl)
static void
vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
enum vect_def_type dt,
vect_memory_access_type memory_access_type,
vec_load_store_type vls_type, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
@ -1006,10 +924,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
if (vls_type == VLS_STORE_INVARIANT)
{
if (slp_node)
prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
1, dt, cost_vec);
else
if (!slp_node)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
stmt_info, 0, vect_prologue);
}
@ -7565,7 +7480,7 @@ vectorizable_store (vec_info *vinfo,
memory_access_type, &gs_info, mask);
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
vect_model_store_cost (vinfo, stmt_info, ncopies,
memory_access_type, vls_type, slp_node, cost_vec);
return true;
}

View File

@ -130,6 +130,7 @@ struct _slp_tree {
permutation. */
vec<unsigned> load_permutation;
tree vectype;
/* Vectorized stmt/s. */
vec<stmt_vec_info> vec_stmts;
/* Number of vector stmts that are created to replace the group of scalar
@ -186,6 +187,7 @@ public:
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
#define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
#define SLP_TREE_VECTYPE(S) (S)->vectype
/* Key for map that records association between
scalar conditions and corresponding loop mask, and