loop-36.c: Reduce amount of iterations to 2 so unrolling still happens.

* gcc.dg/tree-ssa/loop-36.c: Reduce amount of iterations to 2 so unrolling still happens. * gcc.dg/ipa/ipacost-1.c: Prevent inlining * gcc.dg/ipa/ipacost-2.c: Likewise. * gcc.dg/vect/slp-3.c: Loop is no longer unrolled. * tree-inline.c (estimate_operator_cost): Add operands; when division happens by constant, it is cheap. (estimate_num_insns): Loads and stores are not having cost of 0; EH magic stuff is cheap; when computing runtime cost of switch, use log2 base of amount of its cases; builtin_expect has cost of 0; compute cost for moving return value of call. (init_inline_once): Initialize time_based flags. * tree-inline.h (eni_weights_d): Add time_based flag. From-SVN: r147436
2025-03-23 09:40:54 +08:00 · 2009-05-12 16:05:28 +02:00 · 2009-05-12 16:05:28 +02:00 · 02f0b13a1f
commit 02f0b13a1f
parent ecb7f6de29
8 changed files with 72 additions and 15 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,14 @@
+2009-05-12  Jan Hubicka  <jh@suse.cz>
+
+	* tree-inline.c (estimate_operator_cost): Add operands;
+	when division happens by constant, it is cheap.
+	(estimate_num_insns): Loads and stores are not having cost of 0;
+	EH magic stuff is cheap; when computing runtime cost of switch,
+	use log2 base of amount of its cases; builtin_expect has cost of 0;
+	compute cost for moving return value of call.
+	(init_inline_once): Initialize time_based flags.
+	* tree-inline.h (eni_weights_d): Add time_based flag.
+
 2009-05-12  Paolo Bonzini  <bonzini@gnu.org>

 	* df-core.c: Update head documentation.
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,11 @@
+2009-05-12  Jan Hubicka  <jh@suse.cz>
+
+	* gcc.dg/tree-ssa/loop-36.c: Reduce amount of iterations to 2 so unrolling
+	still happens.
+	* gcc.dg/ipa/ipacost-1.c: Prevent inlining
+	* gcc.dg/ipa/ipacost-2.c: Likewise.
+	* gcc.dg/vect/slp-3.c: Loop is no longer unrolled.
+
 2009-05-12  David Billinghurst <billingd@gcc.gnu.org>

 	* gfortran.dg/default_format_1.f90: XFAIL on cygwin. 
--- a/gcc/testsuite/gcc.dg/ipa/ipacost-1.c
+++ b/gcc/testsuite/gcc.dg/ipa/ipacost-1.c
@ -46,6 +46,8 @@ i_can_not_be_propagated_fully2 (int *a)
 main()
 {
  i_can_be_propagated_fully2 (array);
+  i_can_be_propagated_fully2 (array);
+  i_can_not_be_propagated_fully2 (array);
  i_can_not_be_propagated_fully2 (array);
 }

--- a/gcc/testsuite/gcc.dg/ipa/ipacost-2.c
+++ b/gcc/testsuite/gcc.dg/ipa/ipacost-2.c
@ -47,6 +47,8 @@ i_can_not_be_propagated_fully2 (int *a)
 main()
 {
  i_can_be_propagated_fully2 (array);
+  i_can_be_propagated_fully2 (array);
+  i_can_not_be_propagated_fully2 (array);
  i_can_not_be_propagated_fully2 (array);
 }

@ -54,7 +56,7 @@ main()
 /* { dg-final { scan-ipa-dump-times "versioned function i_can_be_propagated_fully " 1 "cp"  } } */
 /* { dg-final { scan-ipa-dump-times "versioned function i_can_not_be_propagated_fully2" 1 "cp"  } } */
 /* { dg-final { scan-ipa-dump-times "versioned function i_can_not_be_propagated_fully " 1 "cp"  } } */
-/* { dg-final { scan-tree-dump-not "i_can_be_propagated" "optimized"  } } */
-/* { dg-final { scan-tree-dump-not "i_can_be_propagated" "optimized"  } } */
+/* { dg-final { scan-tree-dump-not "i_can_be_propagated_fully \\(" "optimized"  } } */
+/* { dg-final { scan-tree-dump-not "i_can_be_propagated_fully2 \\(" "optimized"  } } */
 /* { dg-final { cleanup-ipa-dump "cp" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-36.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-36.c
@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-dce2" } */

-struct X { float array[4]; };
+struct X { float array[2]; };

 struct X a,b;

@ -9,9 +9,9 @@ float foobar () {
  float s = 0;
  unsigned int d;
  struct X c;
-  for (d=0; d<4; ++d)
+  for (d=0; d<2; ++d)
    c.array[d] = a.array[d] * b.array[d];
-  for (d=0; d<4; ++d)
+  for (d=0; d<2; ++d)
    s+=c.array[d];
  return s;
 }
--- a/gcc/testsuite/gcc.dg/vect/slp-3.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-3.c
@ -142,8 +142,7 @@ int main (void)
  return 0;
 }

-/* One of the loops gets complettely unrolled.  */
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_no_align } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
  
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@ -2783,7 +2783,8 @@ estimate_move_cost (tree type)
 /* Returns cost of operation CODE, according to WEIGHTS  */

 static int
-estimate_operator_cost (enum tree_code code, eni_weights *weights)
+estimate_operator_cost (enum tree_code code, eni_weights *weights,
+			tree op1 ATTRIBUTE_UNUSED, tree op2)
 {
  switch (code)
    {
@ -2893,7 +2894,9 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights)
    case FLOOR_MOD_EXPR:
    case ROUND_MOD_EXPR:
    case RDIV_EXPR:
-      return weights->div_mod_cost;
+      if (TREE_CODE (op2) != INTEGER_CST)
+        return weights->div_mod_cost;
+      return 1;

    default:
      /* We expect a copy assignment with no operator.  */
@ -2930,6 +2933,7 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
  unsigned cost, i;
  enum gimple_code code = gimple_code (stmt);
  tree lhs;
+  tree rhs;

  switch (code)
    {
@ -2953,16 +2957,35 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
 	 of moving something into "a", which we compute using the function
 	 estimate_move_cost.  */
      lhs = gimple_assign_lhs (stmt);
+      rhs = gimple_assign_rhs1 (stmt);
+
+      /* EH magic stuff is most probably going to be optimized out.
+         We rarely really need to save EH info for unwinding
+         nested exceptions.  */
+      if (TREE_CODE (lhs) == FILTER_EXPR
+	  || TREE_CODE (lhs) == EXC_PTR_EXPR
+          || TREE_CODE (rhs) == FILTER_EXPR
+	  || TREE_CODE (rhs) == EXC_PTR_EXPR)
+	return 0;
      if (is_gimple_reg (lhs))
 	cost = 0;
      else
 	cost = estimate_move_cost (TREE_TYPE (lhs));

-      cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights);
+      if (!is_gimple_reg (rhs) && !is_gimple_min_invariant (rhs))
+	cost += estimate_move_cost (TREE_TYPE (rhs));
+
+      cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights,
+      				      gimple_assign_rhs1 (stmt),
+				      get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+				      == GIMPLE_BINARY_RHS
+				      ? gimple_assign_rhs2 (stmt) : NULL);
      break;

    case GIMPLE_COND:
-      cost = 1 + estimate_operator_cost (gimple_cond_code (stmt), weights);
+      cost = 1 + estimate_operator_cost (gimple_cond_code (stmt), weights,
+      				         gimple_op (stmt, 0),
+				         gimple_op (stmt, 1));
      break;

    case GIMPLE_SWITCH:
@ -2971,7 +2994,10 @@ estimate_num_insns (gimple stmt, eni_weights *weights)

 	 TODO: once the switch expansion logic is sufficiently separated, we can
 	 do better job on estimating cost of the switch.  */
-      cost = gimple_switch_num_labels (stmt) * 2;
+      if (weights->time_based)
+        cost = floor_log2 (gimple_switch_num_labels (stmt)) * 2;
+      else
+        cost = gimple_switch_num_labels (stmt) * 2;
      break;

    case GIMPLE_CALL:
@ -2994,8 +3020,7 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
 	    case BUILT_IN_CONSTANT_P:
 	      return 0;
 	    case BUILT_IN_EXPECT:
-	      cost = 0;
-	      break;
+	      return 0;

 	    /* Prefetch instruction is not expensive.  */
 	    case BUILT_IN_PREFETCH:
@ -3009,6 +3034,8 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
 	if (decl)
 	  funtype = TREE_TYPE (decl);

+	if (!VOID_TYPE_P (TREE_TYPE (funtype)))
+	  cost += estimate_move_cost (TREE_TYPE (funtype));
 	/* Our cost must be kept in sync with
 	   cgraph_estimate_size_after_inlining that does use function
 	   declaration to figure out the arguments.  */
@ -3133,11 +3160,13 @@ init_inline_once (void)
  eni_inlining_weights.target_builtin_call_cost = 1;
  eni_inlining_weights.div_mod_cost = 10;
  eni_inlining_weights.omp_cost = 40;
+  eni_inlining_weights.time_based = true;

  eni_size_weights.call_cost = 1;
  eni_size_weights.target_builtin_call_cost = 1;
  eni_size_weights.div_mod_cost = 1;
  eni_size_weights.omp_cost = 40;
+  eni_size_weights.time_based = false;

  /* Estimating time for call is difficult, since we have no idea what the
     called function does.  In the current uses of eni_time_weights,
@ -3147,6 +3176,7 @@ init_inline_once (void)
  eni_time_weights.target_builtin_call_cost = 10;
  eni_time_weights.div_mod_cost = 10;
  eni_time_weights.omp_cost = 40;
+  eni_time_weights.time_based = true;
 }

 /* Estimate the number of instructions in a gimple_seq. */
--- a/gcc/tree-inline.h
+++ b/gcc/tree-inline.h
@ -130,6 +130,11 @@ typedef struct eni_weights_d

  /* Cost for omp construct.  */
  unsigned omp_cost;
+
+  /* True when time of statemnt should be estimated.  Thus i.e
+     cost of switch statement is logarithmic rather than linear in number
+     of cases.  */
+  bool time_based;
 } eni_weights;

 /* Weights that estimate_num_insns uses for heuristics in inlining.  */