re PR tree-optimization/65518 (gcc consumes all memory with -O3)

2015-03-23 Richard Biener <rguenther@suse.de> PR tree-optimization/65518 * tree-vect-stmts.c (vectorizable_load): Reject single-element interleaving cases we generate absymal code for. * gcc.dg/vect/pr65518.c: New testcase. From-SVN: r221595
2025-03-21 17:21:03 +08:00 · 2015-03-23 14:59:57 +00:00 · 2015-03-23 14:59:57 +00:00 · d5f035ea52
commit d5f035ea52
parent d886f7cc33
4 changed files with 70 additions and 0 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,9 @@
+2015-03-23  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/65518
+	* tree-vect-stmts.c (vectorizable_load): Reject single-element
+	interleaving cases we generate absymal code for.
+
 2015-03-23  Richard Biener  <rguenther@suse.de>

 	PR tree-optimization/65494
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2015-03-23  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/65518
+	* gcc.dg/vect/pr65518.c: New testcase.
+
 2015-03-23  Richard Biener  <rguenther@suse.de>

 	PR tree-optimization/65494
--- a/gcc/testsuite/gcc.dg/vect/pr65518.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65518.c
@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+extern void abort (void);
+
+typedef struct giga
+{
+  unsigned int g[16];
+} giga;
+
+unsigned long __attribute__((noinline,noclone))
+addfst(giga const *gptr, int num)
+{
+  unsigned int retval = 0;
+  int i;
+  for (i = 0; i < num; i++)
+    retval += gptr[i].g[0];
+  return retval;
+}
+
+int main ()
+{
+  struct giga g[8];
+  unsigned int n = 1;
+  int i, j;
+  for (i = 0; i < 8; ++i)
+    for (j = 0; j < 16; ++j)
+      {
+	g[i].g[j] = n++;
+	__asm__ volatile ("");
+      }
+  if (addfst (g, 8) != 456)
+    abort ();
+  return 0;
+}
+
+/* We don't want to vectorize the single-element interleaving in the way
+   we currently do that (without ignoring not needed vectors in the
+   gap between gptr[0].g[0] and gptr[1].g[0]), because that's very
+   sub-optimal and causes memory explosion (even though the cost model
+   should reject that in the end).  */
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops in function" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@ -5780,6 +5780,22 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
      gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));

      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+
+      /* If this is single-element interleaving with an element distance
+         that leaves unused vector loads around punt - we at least create
+	 very sub-optimal code in that case (and blow up memory,
+	 see PR65518).  */
+      if (first_stmt == stmt
+	  && !GROUP_NEXT_ELEMENT (stmt_info)
+	  && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "single-element interleaving not supported "
+			     "for not adjacent vector loads\n");
+	  return false;
+	}
+
      if (!slp && !PURE_SLP_STMT (stmt_info))
 	{
 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));