re PR middle-end/42505 (loop canonicalization causes a lot of unnecessary temporary variables)

2010-07-10  Sandra Loosemore  <sandra@codesourcery.com>

	PR middle-end/42505

	gcc/
	* tree-inline.c (estimate_num_insns): Refactor builtin complexity
	lookup code into....
	* builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these
	new functions.
	* tree.h (is_simple_builtin, is_inexpensive_builtin): Declare.
	* cfgloopanal.c (target_clobbered_regs): Define.
	(init_set_costs): Initialize target_clobbered_regs.
	(estimate_reg_pressure_cost): Add call_p argument.  When true,
	adjust the number of available registers to exclude the
	call-clobbered registers.
	* cfgloop.h (target_clobbered_regs): Declare.
	(estimate_reg_pressure_cost): Adjust declaration.
	* tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call.
	(ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost.
	(determine_set_costs): Dump target_clobbered_regs.
	(loop_body_includes_call): New function.
	(tree_ssa_iv_optimize_loop): Use it to initialize new field.
	* loop-invariant.c (gain_for_invariant): Adjust arguments to pass
	call_p flag through.
	(best_gain_for_invariant): Likewise.
	(find_invariants_to_move): Likewise.
	(move_single_loop_invariants): Likewise, using already-computed
	has_call field.

From-SVN: r162043
This commit is contained in:
Sandra Loosemore 2010-07-10 14:43:29 -04:00 committed by Sandra Loosemore
parent 35151cd5f8
commit bec922f040
8 changed files with 215 additions and 111 deletions

View File

@ -1,3 +1,30 @@
2010-07-10 Sandra Loosemore <sandra@codesourcery.com>
PR middle-end/42505
* tree-inline.c (estimate_num_insns): Refactor builtin complexity
lookup code into....
* builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these
new functions.
* tree.h (is_simple_builtin, is_inexpensive_builtin): Declare.
* cfgloopanal.c (target_clobbered_regs): Define.
(init_set_costs): Initialize target_clobbered_regs.
(estimate_reg_pressure_cost): Add call_p argument. When true,
adjust the number of available registers to exclude the
call-clobbered registers.
* cfgloop.h (target_clobbered_regs): Declare.
(estimate_reg_pressure_cost): Adjust declaration.
* tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call.
(ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost.
(determine_set_costs): Dump target_clobbered_regs.
(loop_body_includes_call): New function.
(tree_ssa_iv_optimize_loop): Use it to initialize new field.
* loop-invariant.c (gain_for_invariant): Adjust arguments to pass
call_p flag through.
(best_gain_for_invariant): Likewise.
(find_invariants_to_move): Likewise.
(move_single_loop_invariants): Likewise, using already-computed
has_call field.
2010-07-10 Richard Guenther <rguenther@suse.de>
Joern Rennecke <joern.rennecke@embecosm.com>

View File

@ -13753,3 +13753,123 @@ set_builtin_user_assembler_name (tree decl, const char *asmspec)
break;
}
}
/* Return true if DECL is a builtin that expands to a constant or similarly
simple code. */
bool
is_simple_builtin (tree decl)
{
if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
switch (DECL_FUNCTION_CODE (decl))
{
/* Builtins that expand to constants. */
case BUILT_IN_CONSTANT_P:
case BUILT_IN_EXPECT:
case BUILT_IN_OBJECT_SIZE:
case BUILT_IN_UNREACHABLE:
/* Simple register moves or loads from stack. */
case BUILT_IN_RETURN_ADDRESS:
case BUILT_IN_EXTRACT_RETURN_ADDR:
case BUILT_IN_FROB_RETURN_ADDR:
case BUILT_IN_RETURN:
case BUILT_IN_AGGREGATE_INCOMING_ADDRESS:
case BUILT_IN_FRAME_ADDRESS:
case BUILT_IN_VA_END:
case BUILT_IN_STACK_SAVE:
case BUILT_IN_STACK_RESTORE:
/* Exception state returns or moves registers around. */
case BUILT_IN_EH_FILTER:
case BUILT_IN_EH_POINTER:
case BUILT_IN_EH_COPY_VALUES:
return true;
default:
return false;
}
return false;
}
/* Return true if DECL is a builtin that is not expensive, i.e., they are
most probably expanded inline into reasonably simple code. This is a
superset of is_simple_builtin. */
bool
is_inexpensive_builtin (tree decl)
{
if (!decl)
return false;
else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
return true;
else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
switch (DECL_FUNCTION_CODE (decl))
{
case BUILT_IN_ABS:
case BUILT_IN_ALLOCA:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
case BUILT_IN_CLZ:
case BUILT_IN_CLZIMAX:
case BUILT_IN_CLZL:
case BUILT_IN_CLZLL:
case BUILT_IN_CTZ:
case BUILT_IN_CTZIMAX:
case BUILT_IN_CTZL:
case BUILT_IN_CTZLL:
case BUILT_IN_FFS:
case BUILT_IN_FFSIMAX:
case BUILT_IN_FFSL:
case BUILT_IN_FFSLL:
case BUILT_IN_IMAXABS:
case BUILT_IN_FINITE:
case BUILT_IN_FINITEF:
case BUILT_IN_FINITEL:
case BUILT_IN_FINITED32:
case BUILT_IN_FINITED64:
case BUILT_IN_FINITED128:
case BUILT_IN_FPCLASSIFY:
case BUILT_IN_ISFINITE:
case BUILT_IN_ISINF_SIGN:
case BUILT_IN_ISINF:
case BUILT_IN_ISINFF:
case BUILT_IN_ISINFL:
case BUILT_IN_ISINFD32:
case BUILT_IN_ISINFD64:
case BUILT_IN_ISINFD128:
case BUILT_IN_ISNAN:
case BUILT_IN_ISNANF:
case BUILT_IN_ISNANL:
case BUILT_IN_ISNAND32:
case BUILT_IN_ISNAND64:
case BUILT_IN_ISNAND128:
case BUILT_IN_ISNORMAL:
case BUILT_IN_ISGREATER:
case BUILT_IN_ISGREATEREQUAL:
case BUILT_IN_ISLESS:
case BUILT_IN_ISLESSEQUAL:
case BUILT_IN_ISLESSGREATER:
case BUILT_IN_ISUNORDERED:
case BUILT_IN_VA_ARG_PACK:
case BUILT_IN_VA_ARG_PACK_LEN:
case BUILT_IN_VA_COPY:
case BUILT_IN_TRAP:
case BUILT_IN_SAVEREGS:
case BUILT_IN_POPCOUNTL:
case BUILT_IN_POPCOUNTLL:
case BUILT_IN_POPCOUNTIMAX:
case BUILT_IN_POPCOUNT:
case BUILT_IN_PARITYL:
case BUILT_IN_PARITYLL:
case BUILT_IN_PARITYIMAX:
case BUILT_IN_PARITY:
case BUILT_IN_LABS:
case BUILT_IN_LLABS:
case BUILT_IN_PREFETCH:
return true;
default:
return is_simple_builtin (decl);
}
return false;
}

View File

@ -627,13 +627,14 @@ fel_init (loop_iterator *li, loop_p *loop, unsigned flags)
/* The properties of the target. */
extern unsigned target_avail_regs;
extern unsigned target_clobbered_regs;
extern unsigned target_res_regs;
extern unsigned target_reg_cost [2];
extern unsigned target_spill_cost [2];
/* Register pressure estimation for induction variable optimizations & loop
invariant motion. */
extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool);
extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool);
extern void init_set_costs (void);
/* Loop optimizer initialization. */

View File

@ -320,6 +320,8 @@ seq_cost (const_rtx seq, bool speed)
/* The properties of the target. */
unsigned target_avail_regs; /* Number of available registers. */
unsigned target_clobbered_regs; /* Number of available registers that are
call-clobbered. */
unsigned target_res_regs; /* Number of registers reserved for temporary
expressions. */
unsigned target_reg_cost[2]; /* The cost for register when there still
@ -342,10 +344,15 @@ init_set_costs (void)
unsigned i;
target_avail_regs = 0;
target_clobbered_regs = 0;
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
&& !fixed_regs[i])
target_avail_regs++;
{
target_avail_regs++;
if (call_used_regs[i])
target_clobbered_regs++;
}
target_res_regs = 3;
@ -379,20 +386,29 @@ init_set_costs (void)
/* Estimates cost of increased register pressure caused by making N_NEW new
registers live around the loop. N_OLD is the number of registers live
around the loop. */
around the loop. If CALL_P is true, also take into account that
call-used registers may be clobbered in the loop body, reducing the
number of available registers before we spill. */
unsigned
estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
bool call_p)
{
unsigned cost;
unsigned regs_needed = n_new + n_old;
unsigned available_regs = target_avail_regs;
/* If there is a call in the loop body, the call-clobbered registers
are not available for loop invariants. */
if (call_p)
available_regs = available_regs - target_clobbered_regs;
/* If we have enough registers, we should use them and not restrict
the transformations unnecessarily. */
if (regs_needed + target_res_regs <= target_avail_regs)
if (regs_needed + target_res_regs <= available_regs)
return 0;
if (regs_needed <= target_avail_regs)
if (regs_needed <= available_regs)
/* If we are close to running out of registers, try to preserve
them. */
cost = target_reg_cost [speed] * n_new;

View File

@ -1173,11 +1173,13 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed)
/* Calculates gain for eliminating invariant INV. REGS_USED is the number
of registers used in the loop, NEW_REGS is the number of new variables
already added due to the invariant motion. The number of registers needed
for it is stored in *REGS_NEEDED. */
for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed
through to estimate_reg_pressure_cost. */
static int
gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
unsigned *new_regs, unsigned regs_used, bool speed)
unsigned *new_regs, unsigned regs_used,
bool speed, bool call_p)
{
int comp_cost, size_cost;
@ -1188,9 +1190,9 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
if (! flag_ira_loop_pressure)
{
size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
regs_used, speed)
regs_used, speed, call_p)
- estimate_reg_pressure_cost (new_regs[0],
regs_used, speed));
regs_used, speed, call_p));
}
else
{
@ -1245,7 +1247,8 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
static int
best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
unsigned *new_regs, unsigned regs_used, bool speed)
unsigned *new_regs, unsigned regs_used,
bool speed, bool call_p)
{
struct invariant *inv;
int i, gain = 0, again;
@ -1261,7 +1264,7 @@ best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
continue;
again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
speed);
speed, call_p);
if (again > gain)
{
gain = again;
@ -1314,7 +1317,7 @@ set_move_mark (unsigned invno, int gain)
/* Determines which invariants to move. */
static void
find_invariants_to_move (bool speed)
find_invariants_to_move (bool speed, bool call_p)
{
int gain;
unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
@ -1353,7 +1356,8 @@ find_invariants_to_move (bool speed)
new_regs[ira_reg_class_cover[i]] = 0;
}
while ((gain = best_gain_for_invariant (&inv, regs_needed,
new_regs, regs_used, speed)) > 0)
new_regs, regs_used,
speed, call_p)) > 0)
{
set_move_mark (inv->invno, gain);
if (! flag_ira_loop_pressure)
@ -1573,7 +1577,8 @@ move_single_loop_invariants (struct loop *loop)
init_inv_motion_data ();
find_invariants (loop);
find_invariants_to_move (optimize_loop_for_speed_p (loop));
find_invariants_to_move (optimize_loop_for_speed_p (loop),
LOOP_DATA (loop)->has_call);
move_invariants (loop);
free_inv_motion_data ();

View File

@ -3478,105 +3478,13 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
if (POINTER_TYPE_P (funtype))
funtype = TREE_TYPE (funtype);
if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
if (is_simple_builtin (decl))
return 0;
else if (is_inexpensive_builtin (decl))
cost = weights->target_builtin_call_cost;
else
cost = weights->call_cost;
if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
switch (DECL_FUNCTION_CODE (decl))
{
/* Builtins that expand to constants. */
case BUILT_IN_CONSTANT_P:
case BUILT_IN_EXPECT:
case BUILT_IN_OBJECT_SIZE:
case BUILT_IN_UNREACHABLE:
/* Simple register moves or loads from stack. */
case BUILT_IN_RETURN_ADDRESS:
case BUILT_IN_EXTRACT_RETURN_ADDR:
case BUILT_IN_FROB_RETURN_ADDR:
case BUILT_IN_RETURN:
case BUILT_IN_AGGREGATE_INCOMING_ADDRESS:
case BUILT_IN_FRAME_ADDRESS:
case BUILT_IN_VA_END:
case BUILT_IN_STACK_SAVE:
case BUILT_IN_STACK_RESTORE:
/* Exception state returns or moves registers around. */
case BUILT_IN_EH_FILTER:
case BUILT_IN_EH_POINTER:
case BUILT_IN_EH_COPY_VALUES:
return 0;
/* builtins that are not expensive (that is they are most probably
expanded inline into resonably simple code). */
case BUILT_IN_ABS:
case BUILT_IN_ALLOCA:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
case BUILT_IN_CLZ:
case BUILT_IN_CLZIMAX:
case BUILT_IN_CLZL:
case BUILT_IN_CLZLL:
case BUILT_IN_CTZ:
case BUILT_IN_CTZIMAX:
case BUILT_IN_CTZL:
case BUILT_IN_CTZLL:
case BUILT_IN_FFS:
case BUILT_IN_FFSIMAX:
case BUILT_IN_FFSL:
case BUILT_IN_FFSLL:
case BUILT_IN_IMAXABS:
case BUILT_IN_FINITE:
case BUILT_IN_FINITEF:
case BUILT_IN_FINITEL:
case BUILT_IN_FINITED32:
case BUILT_IN_FINITED64:
case BUILT_IN_FINITED128:
case BUILT_IN_FPCLASSIFY:
case BUILT_IN_ISFINITE:
case BUILT_IN_ISINF_SIGN:
case BUILT_IN_ISINF:
case BUILT_IN_ISINFF:
case BUILT_IN_ISINFL:
case BUILT_IN_ISINFD32:
case BUILT_IN_ISINFD64:
case BUILT_IN_ISINFD128:
case BUILT_IN_ISNAN:
case BUILT_IN_ISNANF:
case BUILT_IN_ISNANL:
case BUILT_IN_ISNAND32:
case BUILT_IN_ISNAND64:
case BUILT_IN_ISNAND128:
case BUILT_IN_ISNORMAL:
case BUILT_IN_ISGREATER:
case BUILT_IN_ISGREATEREQUAL:
case BUILT_IN_ISLESS:
case BUILT_IN_ISLESSEQUAL:
case BUILT_IN_ISLESSGREATER:
case BUILT_IN_ISUNORDERED:
case BUILT_IN_VA_ARG_PACK:
case BUILT_IN_VA_ARG_PACK_LEN:
case BUILT_IN_VA_COPY:
case BUILT_IN_TRAP:
case BUILT_IN_SAVEREGS:
case BUILT_IN_POPCOUNTL:
case BUILT_IN_POPCOUNTLL:
case BUILT_IN_POPCOUNTIMAX:
case BUILT_IN_POPCOUNT:
case BUILT_IN_PARITYL:
case BUILT_IN_PARITYLL:
case BUILT_IN_PARITYIMAX:
case BUILT_IN_PARITY:
case BUILT_IN_LABS:
case BUILT_IN_LLABS:
case BUILT_IN_PREFETCH:
cost = weights->target_builtin_call_cost;
break;
default:
break;
}
if (decl)
funtype = TREE_TYPE (decl);

View File

@ -260,6 +260,9 @@ struct ivopts_data
/* Are we optimizing for speed? */
bool speed;
/* Whether the loop body includes any function calls. */
bool body_includes_call;
};
/* An assignment of iv candidates to uses. */
@ -4431,7 +4434,8 @@ ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
{
/* We add size to the cost, so that we prefer eliminating ivs
if possible. */
return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
data->body_includes_call);
}
/* For each size of the induction variable set determine the penalty. */
@ -4450,6 +4454,7 @@ determine_set_costs (struct ivopts_data *data)
{
fprintf (dump_file, "Global costs:\n");
fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
}
@ -5859,6 +5864,25 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
VEC_free (iv_cand_p, heap, data->iv_candidates);
}
/* Returns true if the loop body BODY includes any function calls. */
static bool
loop_body_includes_call (basic_block *body, unsigned num_nodes)
{
gimple_stmt_iterator gsi;
unsigned i;
for (i = 0; i < num_nodes; i++)
for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
if (is_gimple_call (stmt)
&& !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
return true;
}
return false;
}
/* Optimizes the LOOP. Returns true if anything changed. */
static bool
@ -5890,6 +5914,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
}
body = get_loop_body (loop);
data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
free (body);

View File

@ -5046,6 +5046,8 @@ extern tree build_range_check (location_t, tree, tree, int, tree, tree);
extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
tree, tree);
extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
extern bool is_simple_builtin (tree);
extern bool is_inexpensive_builtin (tree);
/* In convert.c */
extern tree strip_float_extensions (tree);