re PR target/64844 (Vectorization inhibited in gcc5 when loop starts with elem[1], aarch64 perf regression from 4.9.1)

2015-01-29  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/64844
	* tree-vect-loop.c (vect_estimate_min_profitable_iters): Always
	dump cost model analysis.
	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
	Do not register adjusted load/store costs here.

	* gcc.dg/vect/pr64844.c: New testcase.

From-SVN: r220244
This commit is contained in:
Richard Biener 2015-01-29 12:53:39 +00:00 committed by Richard Biener
parent 85d44192f6
commit 62c004451a
5 changed files with 93 additions and 41 deletions

View File

@ -1,3 +1,11 @@
2015-01-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/64844
* tree-vect-loop.c (vect_estimate_min_profitable_iters): Always
dump cost model analysis.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
Do not register adjusted load/store costs here.
2015-01-29 Ilya Enkovich <ilya.enkovich@intel.com>
Uros Bizjak <ubizjak@gmail.com>

View File

@ -1,3 +1,8 @@
2015-01-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/64844
* gcc.dg/vect/pr64844.c: New testcase.
2015-01-29 Yuri Rumyantsev <ysrumyan@gmail.com>
PR middle-end/64809

View File

@ -0,0 +1,52 @@
/* { dg-do run } */
/* { dg-require-effective-target vect_double } */
/* { dg-additional-options "-ffast-math" } */
#include "tree-vect.h"
extern void abort (void);
typedef __SIZE_TYPE__ size_t;
static double
compute(size_t n, double const * __restrict a, double const * __restrict b)
{
double res = 0.0;
size_t i;
for (i = 0; i < n; ++i)
res += a[i] + b[i];
return res;
}
void init(double *, double *);
int
main()
{
double ary1[1024];
double ary2[1024];
size_t i;
check_vect ();
// Initialize arrays
for (i = 0; i < 1024; ++i)
{
ary1[i] = 1 / (double)(i + 1);
ary2[i] = 1 + 1 / (double) (i + 1);
__asm__ volatile ("" : : : "memory");
}
// Compute two results using different starting elements
if ((int) compute (512, &ary1[0], &ary2[0]) != 525
|| (int) compute(512, &ary1[1], &ary2[1]) != 523)
abort ();
return 0;
}
/* All targets should allow vectorizing this by some means of
dealing with the known misalignment in loop 2. */
/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -1763,9 +1763,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (do_peeling)
{
stmt_info_for_cost *si;
void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
If the misalignment of DR_i is identical to that of dr0 then set
DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
@ -1791,20 +1788,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
dump_printf_loc (MSG_NOTE, vect_location,
"Peeling for alignment will be applied.\n");
}
/* We've delayed passing the inside-loop peeling costs to the
target cost model until we were sure peeling would happen.
Do so now. */
if (body_cost_vec.exists ())
{
FOR_EACH_VEC_ELT (body_cost_vec, i, si)
{
struct _stmt_vec_info *stmt_info
= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
(void) add_stmt_cost (data, si->count, si->kind, stmt_info,
si->misalign, vect_body);
}
body_cost_vec.release ();
}
/* The inside-loop cost will be accounted for in vectorizable_load
and vectorizable_store correctly with adjusted alignments.
Drop the body_cst_vec on the floor here. */
body_cost_vec.release ();
stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
gcc_assert (stat);

View File

@ -2990,6 +2990,27 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
vec_inside_cost);
dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
vec_prologue_cost);
dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
vec_epilogue_cost);
dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
scalar_single_iter_cost);
dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
scalar_outside_cost);
dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
vec_outside_cost);
dump_printf (MSG_NOTE, " prologue iterations: %d\n",
peel_iters_prologue);
dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
peel_iters_epilogue);
}
/* Calculate number of iterations required to make the vector version
profitable, relative to the loop bodies only. The following condition
must hold true:
@ -3037,30 +3058,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
return;
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
vec_inside_cost);
dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
vec_prologue_cost);
dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
vec_epilogue_cost);
dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
scalar_single_iter_cost);
dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
scalar_outside_cost);
dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
vec_outside_cost);
dump_printf (MSG_NOTE, " prologue iterations: %d\n",
peel_iters_prologue);
dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
peel_iters_epilogue);
dump_printf (MSG_NOTE,
" Calculated minimum iters for profitability: %d\n",
min_profitable_iters);
dump_printf (MSG_NOTE, "\n");
}
dump_printf (MSG_NOTE,
" Calculated minimum iters for profitability: %d\n",
min_profitable_iters);
min_profitable_iters =
min_profitable_iters < vf ? vf : min_profitable_iters;