re PR target/64844 (Vectorization inhibited in gcc5 when loop starts with elem[1], aarch64 perf regression from 4.9.1)

2015-01-29 Richard Biener <rguenther@suse.de> PR tree-optimization/64844 * tree-vect-loop.c (vect_estimate_min_profitable_iters): Always dump cost model analysis. * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not register adjusted load/store costs here. * gcc.dg/vect/pr64844.c: New testcase. From-SVN: r220244
2025-03-19 15:11:08 +08:00 · 2015-01-29 12:53:39 +00:00 · 2015-01-29 12:53:39 +00:00 · 62c004451a
commit 62c004451a
parent 85d44192f6
5 changed files with 93 additions and 41 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,11 @@
+2015-01-29  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64844
+	* tree-vect-loop.c (vect_estimate_min_profitable_iters): Always
+	dump cost model analysis.
+	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
+	Do not register adjusted load/store costs here.
+
 2015-01-29  Ilya Enkovich  <ilya.enkovich@intel.com>
 	    Uros Bizjak  <ubizjak@gmail.com>

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2015-01-29  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64844
+	* gcc.dg/vect/pr64844.c: New testcase.
+
 2015-01-29  Yuri Rumyantsev  <ysrumyan@gmail.com>

 	PR middle-end/64809
--- a/gcc/testsuite/gcc.dg/vect/pr64844.c
+++ b/gcc/testsuite/gcc.dg/vect/pr64844.c
@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-ffast-math" } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+typedef __SIZE_TYPE__ size_t;
+
+static double
+compute(size_t n, double const * __restrict a, double const * __restrict b)
+{
+  double res = 0.0;
+  size_t i;
+  for (i = 0; i < n; ++i)
+    res += a[i] + b[i];
+  return res;
+}
+
+void init(double *, double *);
+
+int
+main()
+{
+  double ary1[1024];
+  double ary2[1024];
+  size_t i;
+
+  check_vect ();
+
+  // Initialize arrays
+  for (i = 0; i < 1024; ++i)
+    {
+      ary1[i] = 1 / (double)(i + 1);
+      ary2[i] = 1 + 1 / (double) (i + 1);
+      __asm__ volatile ("" : : : "memory");
+    }
+
+  // Compute two results using different starting elements
+  if ((int) compute (512, &ary1[0], &ary2[0]) != 525
+      || (int) compute(512, &ary1[1], &ary2[1]) != 523)
+    abort ();
+
+  return 0;
+}
+
+/* All targets should allow vectorizing this by some means of
+   dealing with the known misalignment in loop 2.  */
+
+/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@ -1763,9 +1763,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)

      if (do_peeling)
        {
-	  stmt_info_for_cost *si;
-	  void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
          /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
             If the misalignment of DR_i is identical to that of dr0 then set
             DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
@ -1791,20 +1788,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
              dump_printf_loc (MSG_NOTE, vect_location,
                               "Peeling for alignment will be applied.\n");
            }
-	  /* We've delayed passing the inside-loop peeling costs to the
-	     target cost model until we were sure peeling would happen.
-	     Do so now.  */
-	  if (body_cost_vec.exists ())
-	    {
-	      FOR_EACH_VEC_ELT (body_cost_vec, i, si)
-		{
-		  struct _stmt_vec_info *stmt_info
-		    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
-		  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
-					si->misalign, vect_body);
-		}
-	      body_cost_vec.release ();
-	    }
+	  /* The inside-loop cost will be accounted for in vectorizable_load
+	     and vectorizable_store correctly with adjusted alignments.
+	     Drop the body_cst_vec on the floor here.  */
+	  body_cost_vec.release ();

 	  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
 	  gcc_assert (stat);
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@ -2990,6 +2990,27 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,

  vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
  
+  if (dump_enabled_p ())
+    {
+      dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
+      dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
+                   vec_inside_cost);
+      dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
+                   vec_prologue_cost);
+      dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
+                   vec_epilogue_cost);
+      dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
+                   scalar_single_iter_cost);
+      dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
+                   scalar_outside_cost);
+      dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
+                   vec_outside_cost);
+      dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
+                   peel_iters_prologue);
+      dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
+                   peel_iters_epilogue);
+    }
+
  /* Calculate number of iterations required to make the vector version
     profitable, relative to the loop bodies only.  The following condition
     must hold true:
@ -3037,30 +3058,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
      return;
    }

-  if (dump_enabled_p ())
-    {
-      dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
-      dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
-                   vec_inside_cost);
-      dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
-                   vec_prologue_cost);
-      dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
-                   vec_epilogue_cost);
-      dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
-                   scalar_single_iter_cost);
-      dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
-                   scalar_outside_cost);
-      dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
-                   vec_outside_cost);
-      dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
-                   peel_iters_prologue);
-      dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
-                   peel_iters_epilogue);
-      dump_printf (MSG_NOTE,
-                   "  Calculated minimum iters for profitability: %d\n",
-                   min_profitable_iters);
-      dump_printf (MSG_NOTE, "\n");
-    }
+  dump_printf (MSG_NOTE,
+	       "  Calculated minimum iters for profitability: %d\n",
+	       min_profitable_iters);

  min_profitable_iters =
 	min_profitable_iters < vf ? vf : min_profitable_iters;