mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 13:30:58 +08:00
re PR tree-optimization/88315 (SAD and DOT_PROD SLP reductions with initial value != 0 create wrong code)
2018-12-04 Richard Biener <rguenther@suse.de> PR tree-optimization/88315 * tree-vect-loop.c (get_initial_defs_for_reduction): Simplify and fix initialization vector for SAD and DOT_PROD SLP reductions. * gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial initial value. From-SVN: r266771
This commit is contained in:
parent
4b8caa08fe
commit
056775650a
@ -1,3 +1,9 @@
|
||||
2018-12-04 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/88315
|
||||
* tree-vect-loop.c (get_initial_defs_for_reduction): Simplify
|
||||
and fix initialization vector for SAD and DOT_PROD SLP reductions.
|
||||
|
||||
2018-12-03 Sandra Loosemore <sandra@codesourcery.com>
|
||||
|
||||
PR c/59039
|
||||
|
@ -1,3 +1,9 @@
|
||||
2018-12-04 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/88315
|
||||
* gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial
|
||||
initial value.
|
||||
|
||||
2018-12-03 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/64242
|
||||
|
@ -12,7 +12,7 @@ extern void abort (void);
|
||||
int __attribute__((noinline,noclone))
|
||||
foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
|
||||
{
|
||||
int i_sum = 0;
|
||||
int i_sum = 5;
|
||||
for( int y = 0; y < 16; y++ )
|
||||
{
|
||||
i_sum += abs ( pix1[0] - pix2[0] );
|
||||
@ -52,7 +52,7 @@ main ()
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
if (foo (X, Y, 16) != 32512)
|
||||
if (foo (X, Y, 16) != 32512 + 5)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
|
@ -4100,12 +4100,8 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
||||
unsigned HOST_WIDE_INT nunits;
|
||||
unsigned j, number_of_places_left_in_vector;
|
||||
tree vector_type;
|
||||
tree vop;
|
||||
int group_size = stmts.length ();
|
||||
unsigned int vec_num, i;
|
||||
unsigned number_of_copies = 1;
|
||||
vec<tree> voprnds;
|
||||
voprnds.create (number_of_vectors);
|
||||
unsigned int group_size = stmts.length ();
|
||||
unsigned int i;
|
||||
struct loop *loop;
|
||||
auto_vec<tree, 16> permute_results;
|
||||
|
||||
@ -4138,115 +4134,78 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
||||
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
|
||||
nunits = group_size;
|
||||
|
||||
number_of_copies = nunits * number_of_vectors / group_size;
|
||||
|
||||
number_of_places_left_in_vector = nunits;
|
||||
bool constant_p = true;
|
||||
tree_vector_builder elts (vector_type, nunits, 1);
|
||||
elts.quick_grow (nunits);
|
||||
for (j = 0; j < number_of_copies; j++)
|
||||
for (j = 0; j < nunits * number_of_vectors; ++j)
|
||||
{
|
||||
for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
|
||||
{
|
||||
tree op;
|
||||
/* Get the def before the loop. In reduction chain we have only
|
||||
one initial value. */
|
||||
if ((j != (number_of_copies - 1)
|
||||
|| (reduc_chain && i != 0))
|
||||
&& neutral_op)
|
||||
op = neutral_op;
|
||||
else
|
||||
op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
|
||||
tree op;
|
||||
i = j % group_size;
|
||||
stmt_vinfo = stmts[i];
|
||||
|
||||
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
||||
number_of_places_left_in_vector--;
|
||||
elts[number_of_places_left_in_vector] = op;
|
||||
if (!CONSTANT_CLASS_P (op))
|
||||
constant_p = false;
|
||||
|
||||
if (number_of_places_left_in_vector == 0)
|
||||
{
|
||||
gimple_seq ctor_seq = NULL;
|
||||
tree init;
|
||||
if (constant_p && !neutral_op
|
||||
? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
|
||||
: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
|
||||
/* Build the vector directly from ELTS. */
|
||||
init = gimple_build_vector (&ctor_seq, &elts);
|
||||
else if (neutral_op)
|
||||
{
|
||||
/* Build a vector of the neutral value and shift the
|
||||
other elements into place. */
|
||||
init = gimple_build_vector_from_val (&ctor_seq, vector_type,
|
||||
neutral_op);
|
||||
int k = nunits;
|
||||
while (k > 0 && elts[k - 1] == neutral_op)
|
||||
k -= 1;
|
||||
while (k > 0)
|
||||
{
|
||||
k -= 1;
|
||||
init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
|
||||
vector_type, init, elts[k]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First time round, duplicate ELTS to fill the
|
||||
required number of vectors, then cherry pick the
|
||||
appropriate result for each iteration. */
|
||||
if (vec_oprnds->is_empty ())
|
||||
duplicate_and_interleave (&ctor_seq, vector_type, elts,
|
||||
number_of_vectors,
|
||||
permute_results);
|
||||
init = permute_results[number_of_vectors - j - 1];
|
||||
}
|
||||
if (ctor_seq != NULL)
|
||||
gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
|
||||
voprnds.quick_push (init);
|
||||
|
||||
number_of_places_left_in_vector = nunits;
|
||||
elts.new_vector (vector_type, nunits, 1);
|
||||
elts.quick_grow (nunits);
|
||||
constant_p = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Since the vectors are created in the reverse order, we should invert
|
||||
them. */
|
||||
vec_num = voprnds.length ();
|
||||
for (j = vec_num; j != 0; j--)
|
||||
{
|
||||
vop = voprnds[j - 1];
|
||||
vec_oprnds->quick_push (vop);
|
||||
}
|
||||
|
||||
voprnds.release ();
|
||||
|
||||
/* In case that VF is greater than the unrolling factor needed for the SLP
|
||||
group of stmts, NUMBER_OF_VECTORS to be created is greater than
|
||||
NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
|
||||
to replicate the vectors. */
|
||||
tree neutral_vec = NULL;
|
||||
while (number_of_vectors > vec_oprnds->length ())
|
||||
{
|
||||
if (neutral_op)
|
||||
{
|
||||
if (!neutral_vec)
|
||||
{
|
||||
gimple_seq ctor_seq = NULL;
|
||||
neutral_vec = gimple_build_vector_from_val
|
||||
(&ctor_seq, vector_type, neutral_op);
|
||||
if (ctor_seq != NULL)
|
||||
gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
|
||||
}
|
||||
vec_oprnds->quick_push (neutral_vec);
|
||||
}
|
||||
/* Get the def before the loop. In reduction chain we have only
|
||||
one initial value. Else we have as many as PHIs in the group. */
|
||||
if (reduc_chain)
|
||||
op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
|
||||
else if (((vec_oprnds->length () + 1) * nunits
|
||||
- number_of_places_left_in_vector >= group_size)
|
||||
&& neutral_op)
|
||||
op = neutral_op;
|
||||
else
|
||||
{
|
||||
for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
|
||||
vec_oprnds->quick_push (vop);
|
||||
}
|
||||
op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
|
||||
|
||||
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
||||
number_of_places_left_in_vector--;
|
||||
elts[nunits - number_of_places_left_in_vector - 1] = op;
|
||||
if (!CONSTANT_CLASS_P (op))
|
||||
constant_p = false;
|
||||
|
||||
if (number_of_places_left_in_vector == 0)
|
||||
{
|
||||
gimple_seq ctor_seq = NULL;
|
||||
tree init;
|
||||
if (constant_p && !neutral_op
|
||||
? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
|
||||
: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
|
||||
/* Build the vector directly from ELTS. */
|
||||
init = gimple_build_vector (&ctor_seq, &elts);
|
||||
else if (neutral_op)
|
||||
{
|
||||
/* Build a vector of the neutral value and shift the
|
||||
other elements into place. */
|
||||
init = gimple_build_vector_from_val (&ctor_seq, vector_type,
|
||||
neutral_op);
|
||||
int k = nunits;
|
||||
while (k > 0 && elts[k - 1] == neutral_op)
|
||||
k -= 1;
|
||||
while (k > 0)
|
||||
{
|
||||
k -= 1;
|
||||
init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
|
||||
vector_type, init, elts[k]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First time round, duplicate ELTS to fill the
|
||||
required number of vectors, then cherry pick the
|
||||
appropriate result for each iteration. */
|
||||
if (vec_oprnds->is_empty ())
|
||||
duplicate_and_interleave (&ctor_seq, vector_type, elts,
|
||||
number_of_vectors,
|
||||
permute_results);
|
||||
init = permute_results[number_of_vectors - j - 1];
|
||||
}
|
||||
if (ctor_seq != NULL)
|
||||
gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
|
||||
vec_oprnds->quick_push (init);
|
||||
|
||||
number_of_places_left_in_vector = nunits;
|
||||
elts.new_vector (vector_type, nunits, 1);
|
||||
elts.quick_grow (nunits);
|
||||
constant_p = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user