mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-19 03:30:27 +08:00
cost invariant nodes from vect_slp_analyze_node_operations SLP walk
2020-05-19 Richard Biener <rguenther@suse.de> * tree-vectorizer.h (_slp_tree::vectype): Add field. (SLP_TREE_VECTYPE): New. * tree-vect-slp.c (vect_create_new_slp_node): Initialize SLP_TREE_VECTYPE. (vect_create_new_slp_node): Likewise. (vect_prologue_cost_for_slp): Move here from tree-vect-stmts.c and simplify. (vect_slp_analyze_node_operations): Walk nodes children for invariant costing. (vect_get_constant_vectors): Use local scope op variable. * tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Remove here. (vect_model_simple_cost): Adjust. (vect_model_store_cost): Likewise. (vectorizable_store): Likewise.
This commit is contained in:
parent
573e5f0500
commit
a4b48fc47c
@ -1,3 +1,20 @@
|
||||
2020-05-19 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* tree-vectorizer.h (_slp_tree::vectype): Add field.
|
||||
(SLP_TREE_VECTYPE): New.
|
||||
* tree-vect-slp.c (vect_create_new_slp_node): Initialize
|
||||
SLP_TREE_VECTYPE.
|
||||
(vect_create_new_slp_node): Likewise.
|
||||
(vect_prologue_cost_for_slp): Move here from tree-vect-stmts.c
|
||||
and simplify.
|
||||
(vect_slp_analyze_node_operations): Walk nodes children for
|
||||
invariant costing.
|
||||
(vect_get_constant_vectors): Use local scope op variable.
|
||||
* tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Remove here.
|
||||
(vect_model_simple_cost): Adjust.
|
||||
(vect_model_store_cost): Likewise.
|
||||
(vectorizable_store): Likewise.
|
||||
|
||||
2020-05-18 Martin Sebor <msebor@redhat.com>
|
||||
|
||||
PR middle-end/92815
|
||||
|
@ -129,6 +129,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
|
||||
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
|
||||
SLP_TREE_TWO_OPERATORS (node) = false;
|
||||
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
|
||||
SLP_TREE_VECTYPE (node) = NULL_TREE;
|
||||
node->refcnt = 1;
|
||||
node->max_nunits = 1;
|
||||
|
||||
@ -155,6 +156,7 @@ vect_create_new_slp_node (vec<tree> ops)
|
||||
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
|
||||
SLP_TREE_TWO_OPERATORS (node) = false;
|
||||
SLP_TREE_DEF_TYPE (node) = vect_external_def;
|
||||
SLP_TREE_VECTYPE (node) = NULL_TREE;
|
||||
node->refcnt = 1;
|
||||
node->max_nunits = 1;
|
||||
|
||||
@ -2720,6 +2722,66 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Compute the prologue cost for invariant or constant operands represented
|
||||
by NODE. */
|
||||
|
||||
static void
|
||||
vect_prologue_cost_for_slp (vec_info *vinfo,
|
||||
slp_tree node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
/* Without looking at the actual initializer a vector of
|
||||
constants can be implemented as load from the constant pool.
|
||||
When all elements are the same we can use a splat. */
|
||||
tree vectype = SLP_TREE_VECTYPE (node);
|
||||
/* ??? Ideally we'd want all invariant nodes to have a vectype. */
|
||||
if (!vectype)
|
||||
vectype = get_vectype_for_scalar_type (vinfo,
|
||||
TREE_TYPE (SLP_TREE_SCALAR_OPS
|
||||
(node)[0]), node);
|
||||
unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
|
||||
unsigned num_vects_to_check;
|
||||
unsigned HOST_WIDE_INT const_nunits;
|
||||
unsigned nelt_limit;
|
||||
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
|
||||
&& ! multiple_p (const_nunits, group_size))
|
||||
{
|
||||
num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
|
||||
nelt_limit = const_nunits;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If either the vector has variable length or the vectors
|
||||
are composed of repeated whole groups we only need to
|
||||
cost construction once. All vectors will be the same. */
|
||||
num_vects_to_check = 1;
|
||||
nelt_limit = group_size;
|
||||
}
|
||||
tree elt = NULL_TREE;
|
||||
unsigned nelt = 0;
|
||||
for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
|
||||
{
|
||||
unsigned si = j % group_size;
|
||||
if (nelt == 0)
|
||||
elt = SLP_TREE_SCALAR_OPS (node)[si];
|
||||
/* ??? We're just tracking whether all operands of a single
|
||||
vector initializer are the same, ideally we'd check if
|
||||
we emitted the same one already. */
|
||||
else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
|
||||
elt = NULL_TREE;
|
||||
nelt++;
|
||||
if (nelt == nelt_limit)
|
||||
{
|
||||
record_stmt_cost (cost_vec, 1,
|
||||
SLP_TREE_DEF_TYPE (node) == vect_external_def
|
||||
? (elt ? scalar_to_vec : vec_construct)
|
||||
: vector_load,
|
||||
NULL, vectype, 0, vect_prologue);
|
||||
nelt = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Analyze statements contained in SLP tree NODE after recursively analyzing
|
||||
the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
|
||||
|
||||
@ -2735,6 +2797,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
||||
int i, j;
|
||||
slp_tree child;
|
||||
|
||||
/* Assume we can code-generate all invariants. */
|
||||
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
|
||||
return true;
|
||||
|
||||
@ -2798,6 +2861,26 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
||||
if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
||||
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
|
||||
|
||||
/* When the node can be vectorized cost invariant nodes it references.
|
||||
This is not done in DFS order to allow the refering node
|
||||
vectorizable_* calls to nail down the invariant nodes vector type
|
||||
and possibly unshare it if it needs a different vector type than
|
||||
other referrers. */
|
||||
if (res)
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
{
|
||||
/* ??? After auditing more code paths make a "default"
|
||||
and push the vector type from NODE to all children
|
||||
if it is not already set. */
|
||||
/* Perform usual caching, note code-generation still
|
||||
code-gens these nodes multiple times but we expect
|
||||
to CSE them later. */
|
||||
if (!visited.contains (child)
|
||||
&& !lvisited.add (child))
|
||||
vect_prologue_cost_for_slp (vinfo, child, cost_vec);
|
||||
}
|
||||
|
||||
/* If this node can't be vectorized, try pruning the tree here rather
|
||||
than felling the whole thing. */
|
||||
if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
|
||||
@ -3600,6 +3683,7 @@ vect_get_constant_vectors (vec_info *vinfo,
|
||||
stmt_vec_info insert_after = NULL;
|
||||
for (j = 0; j < number_of_copies; j++)
|
||||
{
|
||||
tree op;
|
||||
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
|
||||
{
|
||||
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
||||
|
@ -786,68 +786,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
|
||||
return opt_result::success ();
|
||||
}
|
||||
|
||||
/* Compute the prologue cost for invariant or constant operands. */
|
||||
|
||||
static unsigned
|
||||
vect_prologue_cost_for_slp_op (vec_info *vinfo,
|
||||
slp_tree node,
|
||||
unsigned opno, enum vect_def_type dt,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
|
||||
tree op = gimple_op (stmt, opno);
|
||||
unsigned prologue_cost = 0;
|
||||
|
||||
/* Without looking at the actual initializer a vector of
|
||||
constants can be implemented as load from the constant pool.
|
||||
When all elements are the same we can use a splat. */
|
||||
tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
|
||||
unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
|
||||
unsigned num_vects_to_check;
|
||||
unsigned HOST_WIDE_INT const_nunits;
|
||||
unsigned nelt_limit;
|
||||
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
|
||||
&& ! multiple_p (const_nunits, group_size))
|
||||
{
|
||||
num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
|
||||
nelt_limit = const_nunits;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If either the vector has variable length or the vectors
|
||||
are composed of repeated whole groups we only need to
|
||||
cost construction once. All vectors will be the same. */
|
||||
num_vects_to_check = 1;
|
||||
nelt_limit = group_size;
|
||||
}
|
||||
tree elt = NULL_TREE;
|
||||
unsigned nelt = 0;
|
||||
for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
|
||||
{
|
||||
unsigned si = j % group_size;
|
||||
if (nelt == 0)
|
||||
elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
|
||||
/* ??? We're just tracking whether all operands of a single
|
||||
vector initializer are the same, ideally we'd check if
|
||||
we emitted the same one already. */
|
||||
else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
|
||||
opno))
|
||||
elt = NULL_TREE;
|
||||
nelt++;
|
||||
if (nelt == nelt_limit)
|
||||
{
|
||||
prologue_cost += record_stmt_cost
|
||||
(cost_vec, 1,
|
||||
dt == vect_external_def
|
||||
? (elt ? scalar_to_vec : vec_construct) : vector_load,
|
||||
NULL, vectype, 0, vect_prologue);
|
||||
nelt = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return prologue_cost;
|
||||
}
|
||||
|
||||
/* Function vect_model_simple_cost.
|
||||
|
||||
Models cost for simple operations, i.e. those that only emit ncopies of a
|
||||
@ -855,7 +793,7 @@ vect_prologue_cost_for_slp_op (vec_info *vinfo,
|
||||
be generated for the single vector op. We will handle that shortly. */
|
||||
|
||||
static void
|
||||
vect_model_simple_cost (vec_info *vinfo,
|
||||
vect_model_simple_cost (vec_info *,
|
||||
stmt_vec_info stmt_info, int ncopies,
|
||||
enum vect_def_type *dt,
|
||||
int ndts,
|
||||
@ -871,26 +809,7 @@ vect_model_simple_cost (vec_info *vinfo,
|
||||
if (node)
|
||||
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
|
||||
|
||||
if (node)
|
||||
{
|
||||
/* Scan operands and account for prologue cost of constants/externals.
|
||||
??? This over-estimates cost for multiple uses and should be
|
||||
re-engineered. */
|
||||
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
|
||||
tree lhs = gimple_get_lhs (stmt);
|
||||
for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
|
||||
{
|
||||
tree op = gimple_op (stmt, i);
|
||||
enum vect_def_type dt;
|
||||
if (!op || op == lhs)
|
||||
continue;
|
||||
if (vect_is_simple_use (op, vinfo, &dt)
|
||||
&& (dt == vect_constant_def || dt == vect_external_def))
|
||||
prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
|
||||
i, dt, cost_vec);
|
||||
}
|
||||
}
|
||||
else
|
||||
if (!node)
|
||||
/* Cost the "broadcast" of a scalar operand in to a vector operand.
|
||||
Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
|
||||
cost model. */
|
||||
@ -991,7 +910,6 @@ cfun_returns (tree decl)
|
||||
|
||||
static void
|
||||
vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
|
||||
enum vect_def_type dt,
|
||||
vect_memory_access_type memory_access_type,
|
||||
vec_load_store_type vls_type, slp_tree slp_node,
|
||||
stmt_vector_for_cost *cost_vec)
|
||||
@ -1006,10 +924,7 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
|
||||
|
||||
if (vls_type == VLS_STORE_INVARIANT)
|
||||
{
|
||||
if (slp_node)
|
||||
prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
|
||||
1, dt, cost_vec);
|
||||
else
|
||||
if (!slp_node)
|
||||
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
}
|
||||
@ -7565,7 +7480,7 @@ vectorizable_store (vec_info *vinfo,
|
||||
memory_access_type, &gs_info, mask);
|
||||
|
||||
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
|
||||
vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
|
||||
vect_model_store_cost (vinfo, stmt_info, ncopies,
|
||||
memory_access_type, vls_type, slp_node, cost_vec);
|
||||
return true;
|
||||
}
|
||||
|
@ -130,6 +130,7 @@ struct _slp_tree {
|
||||
permutation. */
|
||||
vec<unsigned> load_permutation;
|
||||
|
||||
tree vectype;
|
||||
/* Vectorized stmt/s. */
|
||||
vec<stmt_vec_info> vec_stmts;
|
||||
/* Number of vector stmts that are created to replace the group of scalar
|
||||
@ -186,6 +187,7 @@ public:
|
||||
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
|
||||
#define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
|
||||
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
|
||||
#define SLP_TREE_VECTYPE(S) (S)->vectype
|
||||
|
||||
/* Key for map that records association between
|
||||
scalar conditions and corresponding loop mask, and
|
||||
|
Loading…
x
Reference in New Issue
Block a user