mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-23 09:40:54 +08:00
loop-36.c: Reduce amount of iterations to 2 so unrolling still happens.
* gcc.dg/tree-ssa/loop-36.c: Reduce amount of iterations to 2 so unrolling still happens. * gcc.dg/ipa/ipacost-1.c: Prevent inlining * gcc.dg/ipa/ipacost-2.c: Likewise. * gcc.dg/vect/slp-3.c: Loop is no longer unrolled. * tree-inline.c (estimate_operator_cost): Add operands; when division happens by constant, it is cheap. (estimate_num_insns): Loads and stores are not having cost of 0; EH magic stuff is cheap; when computing runtime cost of switch, use log2 base of amount of its cases; builtin_expect has cost of 0; compute cost for moving return value of call. (init_inline_once): Initialize time_based flags. * tree-inline.h (eni_weights_d): Add time_based flag. From-SVN: r147436
This commit is contained in:
parent
ecb7f6de29
commit
02f0b13a1f
@ -1,3 +1,14 @@
|
||||
2009-05-12 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* tree-inline.c (estimate_operator_cost): Add operands;
|
||||
when division happens by constant, it is cheap.
|
||||
(estimate_num_insns): Loads and stores are not having cost of 0;
|
||||
EH magic stuff is cheap; when computing runtime cost of switch,
|
||||
use log2 base of amount of its cases; builtin_expect has cost of 0;
|
||||
compute cost for moving return value of call.
|
||||
(init_inline_once): Initialize time_based flags.
|
||||
* tree-inline.h (eni_weights_d): Add time_based flag.
|
||||
|
||||
2009-05-12 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* df-core.c: Update head documentation.
|
||||
|
@ -1,3 +1,11 @@
|
||||
2009-05-12 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/loop-36.c: Reduce amount of iterations to 2 so unrolling
|
||||
still happens.
|
||||
* gcc.dg/ipa/ipacost-1.c: Prevent inlining
|
||||
* gcc.dg/ipa/ipacost-2.c: Likewise.
|
||||
* gcc.dg/vect/slp-3.c: Loop is no longer unrolled.
|
||||
|
||||
2009-05-12 David Billinghurst <billingd@gcc.gnu.org>
|
||||
|
||||
* gfortran.dg/default_format_1.f90: XFAIL on cygwin.
|
||||
|
@ -46,6 +46,8 @@ i_can_not_be_propagated_fully2 (int *a)
|
||||
main()
|
||||
{
|
||||
i_can_be_propagated_fully2 (array);
|
||||
i_can_be_propagated_fully2 (array);
|
||||
i_can_not_be_propagated_fully2 (array);
|
||||
i_can_not_be_propagated_fully2 (array);
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,8 @@ i_can_not_be_propagated_fully2 (int *a)
|
||||
main()
|
||||
{
|
||||
i_can_be_propagated_fully2 (array);
|
||||
i_can_be_propagated_fully2 (array);
|
||||
i_can_not_be_propagated_fully2 (array);
|
||||
i_can_not_be_propagated_fully2 (array);
|
||||
}
|
||||
|
||||
@ -54,7 +56,7 @@ main()
|
||||
/* { dg-final { scan-ipa-dump-times "versioned function i_can_be_propagated_fully " 1 "cp" } } */
|
||||
/* { dg-final { scan-ipa-dump-times "versioned function i_can_not_be_propagated_fully2" 1 "cp" } } */
|
||||
/* { dg-final { scan-ipa-dump-times "versioned function i_can_not_be_propagated_fully " 1 "cp" } } */
|
||||
/* { dg-final { scan-tree-dump-not "i_can_be_propagated" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-not "i_can_be_propagated" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-not "i_can_be_propagated_fully \\(" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-not "i_can_be_propagated_fully2 \\(" "optimized" } } */
|
||||
/* { dg-final { cleanup-ipa-dump "cp" } } */
|
||||
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-dce2" } */
|
||||
|
||||
struct X { float array[4]; };
|
||||
struct X { float array[2]; };
|
||||
|
||||
struct X a,b;
|
||||
|
||||
@ -9,9 +9,9 @@ float foobar () {
|
||||
float s = 0;
|
||||
unsigned int d;
|
||||
struct X c;
|
||||
for (d=0; d<4; ++d)
|
||||
for (d=0; d<2; ++d)
|
||||
c.array[d] = a.array[d] * b.array[d];
|
||||
for (d=0; d<4; ++d)
|
||||
for (d=0; d<2; ++d)
|
||||
s+=c.array[d];
|
||||
return s;
|
||||
}
|
||||
|
@ -142,8 +142,7 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* One of the loops gets complettely unrolled. */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
@ -2783,7 +2783,8 @@ estimate_move_cost (tree type)
|
||||
/* Returns cost of operation CODE, according to WEIGHTS */
|
||||
|
||||
static int
|
||||
estimate_operator_cost (enum tree_code code, eni_weights *weights)
|
||||
estimate_operator_cost (enum tree_code code, eni_weights *weights,
|
||||
tree op1 ATTRIBUTE_UNUSED, tree op2)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
@ -2893,7 +2894,9 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights)
|
||||
case FLOOR_MOD_EXPR:
|
||||
case ROUND_MOD_EXPR:
|
||||
case RDIV_EXPR:
|
||||
return weights->div_mod_cost;
|
||||
if (TREE_CODE (op2) != INTEGER_CST)
|
||||
return weights->div_mod_cost;
|
||||
return 1;
|
||||
|
||||
default:
|
||||
/* We expect a copy assignment with no operator. */
|
||||
@ -2930,6 +2933,7 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
|
||||
unsigned cost, i;
|
||||
enum gimple_code code = gimple_code (stmt);
|
||||
tree lhs;
|
||||
tree rhs;
|
||||
|
||||
switch (code)
|
||||
{
|
||||
@ -2953,16 +2957,35 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
|
||||
of moving something into "a", which we compute using the function
|
||||
estimate_move_cost. */
|
||||
lhs = gimple_assign_lhs (stmt);
|
||||
rhs = gimple_assign_rhs1 (stmt);
|
||||
|
||||
/* EH magic stuff is most probably going to be optimized out.
|
||||
We rarely really need to save EH info for unwinding
|
||||
nested exceptions. */
|
||||
if (TREE_CODE (lhs) == FILTER_EXPR
|
||||
|| TREE_CODE (lhs) == EXC_PTR_EXPR
|
||||
|| TREE_CODE (rhs) == FILTER_EXPR
|
||||
|| TREE_CODE (rhs) == EXC_PTR_EXPR)
|
||||
return 0;
|
||||
if (is_gimple_reg (lhs))
|
||||
cost = 0;
|
||||
else
|
||||
cost = estimate_move_cost (TREE_TYPE (lhs));
|
||||
|
||||
cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights);
|
||||
if (!is_gimple_reg (rhs) && !is_gimple_min_invariant (rhs))
|
||||
cost += estimate_move_cost (TREE_TYPE (rhs));
|
||||
|
||||
cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights,
|
||||
gimple_assign_rhs1 (stmt),
|
||||
get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
|
||||
== GIMPLE_BINARY_RHS
|
||||
? gimple_assign_rhs2 (stmt) : NULL);
|
||||
break;
|
||||
|
||||
case GIMPLE_COND:
|
||||
cost = 1 + estimate_operator_cost (gimple_cond_code (stmt), weights);
|
||||
cost = 1 + estimate_operator_cost (gimple_cond_code (stmt), weights,
|
||||
gimple_op (stmt, 0),
|
||||
gimple_op (stmt, 1));
|
||||
break;
|
||||
|
||||
case GIMPLE_SWITCH:
|
||||
@ -2971,7 +2994,10 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
|
||||
|
||||
TODO: once the switch expansion logic is sufficiently separated, we can
|
||||
do better job on estimating cost of the switch. */
|
||||
cost = gimple_switch_num_labels (stmt) * 2;
|
||||
if (weights->time_based)
|
||||
cost = floor_log2 (gimple_switch_num_labels (stmt)) * 2;
|
||||
else
|
||||
cost = gimple_switch_num_labels (stmt) * 2;
|
||||
break;
|
||||
|
||||
case GIMPLE_CALL:
|
||||
@ -2994,8 +3020,7 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
|
||||
case BUILT_IN_CONSTANT_P:
|
||||
return 0;
|
||||
case BUILT_IN_EXPECT:
|
||||
cost = 0;
|
||||
break;
|
||||
return 0;
|
||||
|
||||
/* Prefetch instruction is not expensive. */
|
||||
case BUILT_IN_PREFETCH:
|
||||
@ -3009,6 +3034,8 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
|
||||
if (decl)
|
||||
funtype = TREE_TYPE (decl);
|
||||
|
||||
if (!VOID_TYPE_P (TREE_TYPE (funtype)))
|
||||
cost += estimate_move_cost (TREE_TYPE (funtype));
|
||||
/* Our cost must be kept in sync with
|
||||
cgraph_estimate_size_after_inlining that does use function
|
||||
declaration to figure out the arguments. */
|
||||
@ -3133,11 +3160,13 @@ init_inline_once (void)
|
||||
eni_inlining_weights.target_builtin_call_cost = 1;
|
||||
eni_inlining_weights.div_mod_cost = 10;
|
||||
eni_inlining_weights.omp_cost = 40;
|
||||
eni_inlining_weights.time_based = true;
|
||||
|
||||
eni_size_weights.call_cost = 1;
|
||||
eni_size_weights.target_builtin_call_cost = 1;
|
||||
eni_size_weights.div_mod_cost = 1;
|
||||
eni_size_weights.omp_cost = 40;
|
||||
eni_size_weights.time_based = false;
|
||||
|
||||
/* Estimating time for call is difficult, since we have no idea what the
|
||||
called function does. In the current uses of eni_time_weights,
|
||||
@ -3147,6 +3176,7 @@ init_inline_once (void)
|
||||
eni_time_weights.target_builtin_call_cost = 10;
|
||||
eni_time_weights.div_mod_cost = 10;
|
||||
eni_time_weights.omp_cost = 40;
|
||||
eni_time_weights.time_based = true;
|
||||
}
|
||||
|
||||
/* Estimate the number of instructions in a gimple_seq. */
|
||||
|
@ -130,6 +130,11 @@ typedef struct eni_weights_d
|
||||
|
||||
/* Cost for omp construct. */
|
||||
unsigned omp_cost;
|
||||
|
||||
/* True when time of statemnt should be estimated. Thus i.e
|
||||
cost of switch statement is logarithmic rather than linear in number
|
||||
of cases. */
|
||||
bool time_based;
|
||||
} eni_weights;
|
||||
|
||||
/* Weights that estimate_num_insns uses for heuristics in inlining. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user