mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-17 18:00:55 +08:00
loop-unswitch.c (unswitch_loop): Pass probabilities to loopify.
* loop-unswitch.c (unswitch_loop): Pass probabilities to loopify. * tree-ssa-loop-unswitch.c (tree_unswitch_loop): Pass probabilities to loop_version. * cfgloopmanip.c (scale_loop_frequencies): Export. (loopify): Scale the frequencies by prescribed coefficients. (set_zero_probability): New function. (duplicate_loop_to_header_edge): Improve updating of frequencies. (lv_adjust_loop_entry_edge, loop_version): Set probabilities and frequencies according to arguments. * tree-ssa-loop-manip.c (tree_unroll_loop): Set probabilities correctly. * cfg.c (scale_bbs_frequencies_int): Allow scaling the frequencies up. * modulo-sched.c (sms_schedule): Set probabilities for entering versioned loop correctly. * tree-vect-transform.c (vect_transform_loop): Ditto. * cfgloop.h (loopify, loop_version): Declaration changed. (scale_loop_frequencies): Declared. * gcc.dg/tree-ssa/update-unroll-1.c: New test. * gcc.dg/tree-ssa/update-unswitch-1.c: New test. From-SVN: r120378
This commit is contained in:
parent
7a388ee453
commit
03cb2019ff
@ -1,3 +1,23 @@
|
||||
2006-01-03 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* loop-unswitch.c (unswitch_loop): Pass probabilities to loopify.
|
||||
* tree-ssa-loop-unswitch.c (tree_unswitch_loop): Pass probabilities
|
||||
to loop_version.
|
||||
* cfgloopmanip.c (scale_loop_frequencies): Export.
|
||||
(loopify): Scale the frequencies by prescribed coefficients.
|
||||
(set_zero_probability): New function.
|
||||
(duplicate_loop_to_header_edge): Improve updating of frequencies.
|
||||
(lv_adjust_loop_entry_edge, loop_version): Set probabilities
|
||||
and frequencies according to arguments.
|
||||
* tree-ssa-loop-manip.c (tree_unroll_loop): Set probabilities
|
||||
correctly.
|
||||
* cfg.c (scale_bbs_frequencies_int): Allow scaling the frequencies up.
|
||||
* modulo-sched.c (sms_schedule): Set probabilities for entering
|
||||
versioned loop correctly.
|
||||
* tree-vect-transform.c (vect_transform_loop): Ditto.
|
||||
* cfgloop.h (loopify, loop_version): Declaration changed.
|
||||
(scale_loop_frequencies): Declared.
|
||||
|
||||
2007-01-02 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* cgraph.c: Include tree-flow.h
|
||||
@ -41,12 +61,12 @@
|
||||
|
||||
2007-01-02 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* tree-mudflap.c (mf_decl_cache_locals, mf_build_check_statement_for):
|
||||
Do not add referenced vars.
|
||||
* tree-cfg.c (update_modified_stmts): Do not update when SSA operands
|
||||
are not active.
|
||||
* passes.c (init_optimization_passes): Put mudflap_2 after
|
||||
free_datastructures.
|
||||
* tree-mudflap.c (mf_decl_cache_locals, mf_build_check_statement_for):
|
||||
Do not add referenced vars.
|
||||
* tree-cfg.c (update_modified_stmts): Do not update when SSA operands
|
||||
are not active.
|
||||
* passes.c (init_optimization_passes): Put mudflap_2 after
|
||||
free_datastructures.
|
||||
|
||||
2007-01-02 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
|
21
gcc/cfg.c
21
gcc/cfg.c
@ -949,15 +949,28 @@ scale_bbs_frequencies_int (basic_block *bbs, int nbbs, int num, int den)
|
||||
edge e;
|
||||
if (num < 0)
|
||||
num = 0;
|
||||
if (num > den)
|
||||
|
||||
/* Scale NUM and DEN to avoid overflows. Frequencies are in order of
|
||||
10^4, if we make DEN <= 10^3, we can afford to upscale by 100
|
||||
and still safely fit in int during calculations. */
|
||||
if (den > 1000)
|
||||
{
|
||||
if (num > 1000000)
|
||||
return;
|
||||
|
||||
num = RDIV (1000 * num, den);
|
||||
den = 1000;
|
||||
}
|
||||
if (num > 100 * den)
|
||||
return;
|
||||
/* Assume that the users are producing the fraction from frequencies
|
||||
that never grow far enough to risk arithmetic overflow. */
|
||||
gcc_assert (num < 65536);
|
||||
|
||||
for (i = 0; i < nbbs; i++)
|
||||
{
|
||||
edge_iterator ei;
|
||||
bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den);
|
||||
/* Make sure the frequencies do not grow over BB_FREQ_MAX. */
|
||||
if (bbs[i]->frequency > BB_FREQ_MAX)
|
||||
bbs[i]->frequency = BB_FREQ_MAX;
|
||||
bbs[i]->count = RDIV (bbs[i]->count * num, den);
|
||||
FOR_EACH_EDGE (e, ei, bbs[i]->succs)
|
||||
e->count = RDIV (e->count * num, den);
|
||||
|
@ -254,10 +254,12 @@ extern bool duplicate_loop_to_header_edge (struct loop *, edge,
|
||||
unsigned, sbitmap, edge,
|
||||
VEC (edge, heap) **, int);
|
||||
extern struct loop *loopify (edge, edge,
|
||||
basic_block, edge, edge, bool);
|
||||
basic_block, edge, edge, bool,
|
||||
unsigned, unsigned);
|
||||
struct loop * loop_version (struct loop *, void *,
|
||||
basic_block *, bool);
|
||||
basic_block *, unsigned, unsigned, unsigned, bool);
|
||||
extern bool remove_path (edge);
|
||||
void scale_loop_frequencies (struct loop *, int, int);
|
||||
|
||||
/* Induction variable analysis. */
|
||||
|
||||
|
@ -44,7 +44,6 @@ static void fix_loop_placements (struct loop *, bool *);
|
||||
static bool fix_bb_placement (basic_block);
|
||||
static void fix_bb_placements (basic_block, bool *);
|
||||
static void place_new_loop (struct loop *);
|
||||
static void scale_loop_frequencies (struct loop *, int, int);
|
||||
static basic_block create_preheader (struct loop *, int);
|
||||
static void unloop (struct loop *, bool *);
|
||||
|
||||
@ -396,7 +395,7 @@ add_loop (struct loop *loop, struct loop *outer)
|
||||
}
|
||||
|
||||
/* Multiply all frequencies in LOOP by NUM/DEN. */
|
||||
static void
|
||||
void
|
||||
scale_loop_frequencies (struct loop *loop, int num, int den)
|
||||
{
|
||||
basic_block *bbs;
|
||||
@ -413,12 +412,13 @@ scale_loop_frequencies (struct loop *loop, int num, int den)
|
||||
LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
|
||||
FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
|
||||
TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
|
||||
Returns newly created loop. */
|
||||
Returns the newly created loop. Frequencies and counts in the new loop
|
||||
are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
|
||||
|
||||
struct loop *
|
||||
loopify (edge latch_edge, edge header_edge,
|
||||
basic_block switch_bb, edge true_edge, edge false_edge,
|
||||
bool redirect_all_edges)
|
||||
bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
|
||||
{
|
||||
basic_block succ_bb = latch_edge->dest;
|
||||
basic_block pred_bb = header_edge->src;
|
||||
@ -427,7 +427,7 @@ loopify (edge latch_edge, edge header_edge,
|
||||
sbitmap seen;
|
||||
struct loop *loop = XCNEW (struct loop);
|
||||
struct loop *outer = succ_bb->loop_father->outer;
|
||||
int freq, prob, tot_prob;
|
||||
int freq;
|
||||
gcov_type cnt;
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
@ -437,10 +437,6 @@ loopify (edge latch_edge, edge header_edge,
|
||||
|
||||
freq = EDGE_FREQUENCY (header_edge);
|
||||
cnt = header_edge->count;
|
||||
prob = EDGE_SUCC (switch_bb, 0)->probability;
|
||||
tot_prob = prob + EDGE_SUCC (switch_bb, 1)->probability;
|
||||
if (tot_prob == 0)
|
||||
tot_prob = 1;
|
||||
|
||||
/* Redirect edges. */
|
||||
loop_redirect_edge (latch_edge, loop->header);
|
||||
@ -469,12 +465,17 @@ loopify (edge latch_edge, edge header_edge,
|
||||
add_bb_to_loop (switch_bb, outer);
|
||||
|
||||
/* Fix frequencies. */
|
||||
switch_bb->frequency = freq;
|
||||
switch_bb->count = cnt;
|
||||
FOR_EACH_EDGE (e, ei, switch_bb->succs)
|
||||
e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
|
||||
scale_loop_frequencies (loop, prob, tot_prob);
|
||||
scale_loop_frequencies (succ_bb->loop_father, tot_prob - prob, tot_prob);
|
||||
if (redirect_all_edges)
|
||||
{
|
||||
switch_bb->frequency = freq;
|
||||
switch_bb->count = cnt;
|
||||
FOR_EACH_EDGE (e, ei, switch_bb->succs)
|
||||
{
|
||||
e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
|
||||
}
|
||||
}
|
||||
scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
|
||||
scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
|
||||
|
||||
/* Update dominators of blocks outside of LOOP. */
|
||||
dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
|
||||
@ -804,6 +805,41 @@ update_single_exit_for_duplicated_loops (struct loop *orig_loops[], unsigned n)
|
||||
update_single_exit_for_duplicated_loop (orig_loops[i]);
|
||||
}
|
||||
|
||||
/* Sets probability and count of edge E to zero. The probability and count
|
||||
is redistributed evenly to the remaining edges coming from E->src. */
|
||||
|
||||
static void
|
||||
set_zero_probability (edge e)
|
||||
{
|
||||
basic_block bb = e->src;
|
||||
edge_iterator ei;
|
||||
edge ae, last = NULL;
|
||||
unsigned n = EDGE_COUNT (bb->succs);
|
||||
gcov_type cnt = e->count, cnt1;
|
||||
unsigned prob = e->probability, prob1;
|
||||
|
||||
gcc_assert (n > 1);
|
||||
cnt1 = cnt / (n - 1);
|
||||
prob1 = prob / (n - 1);
|
||||
|
||||
FOR_EACH_EDGE (ae, ei, bb->succs)
|
||||
{
|
||||
if (ae == e)
|
||||
continue;
|
||||
|
||||
ae->probability += prob1;
|
||||
ae->count += cnt1;
|
||||
last = ae;
|
||||
}
|
||||
|
||||
/* Move the rest to one of the edges. */
|
||||
last->probability += prob % (n - 1);
|
||||
last->count += cnt % (n - 1);
|
||||
|
||||
e->probability = 0;
|
||||
e->count = 0;
|
||||
}
|
||||
|
||||
/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
|
||||
loop structure and dominators. E's destination must be LOOP header for
|
||||
this to work, i.e. it must be entry or latch edge of this loop; these are
|
||||
@ -834,10 +870,13 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
||||
unsigned i, j, n;
|
||||
int is_latch = (latch == e->src);
|
||||
int scale_act = 0, *scale_step = NULL, scale_main = 0;
|
||||
int scale_after_exit = 0;
|
||||
int p, freq_in, freq_le, freq_out_orig;
|
||||
int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
|
||||
int add_irreducible_flag;
|
||||
basic_block place_after;
|
||||
bitmap bbs_to_scale = NULL;
|
||||
bitmap_iterator bi;
|
||||
|
||||
gcc_assert (e->dest == loop->header);
|
||||
gcc_assert (ndupl > 0);
|
||||
@ -887,10 +926,26 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
||||
prob_pass_wont_exit =
|
||||
RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
|
||||
|
||||
if (orig
|
||||
&& REG_BR_PROB_BASE - orig->probability != 0)
|
||||
{
|
||||
/* The blocks that are dominated by a removed exit edge ORIG have
|
||||
frequencies scaled by this. */
|
||||
scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
|
||||
REG_BR_PROB_BASE - orig->probability);
|
||||
bbs_to_scale = BITMAP_ALLOC (NULL);
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (bbs[i] != orig->src
|
||||
&& dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
|
||||
bitmap_set_bit (bbs_to_scale, i);
|
||||
}
|
||||
}
|
||||
|
||||
scale_step = XNEWVEC (int, ndupl);
|
||||
|
||||
for (i = 1; i <= ndupl; i++)
|
||||
scale_step[i - 1] = TEST_BIT (wont_exit, i)
|
||||
for (i = 1; i <= ndupl; i++)
|
||||
scale_step[i - 1] = TEST_BIT (wont_exit, i)
|
||||
? prob_pass_wont_exit
|
||||
: prob_pass_thru;
|
||||
|
||||
@ -1043,6 +1098,17 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
||||
{
|
||||
if (to_remove)
|
||||
VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]);
|
||||
set_zero_probability (new_spec_edges[SE_ORIG]);
|
||||
|
||||
/* Scale the frequencies of the blocks dominated by the exit. */
|
||||
if (bbs_to_scale)
|
||||
{
|
||||
EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
|
||||
{
|
||||
scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
|
||||
REG_BR_PROB_BASE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Record the first copy in the control flow order if it is not
|
||||
@ -1068,6 +1134,17 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
||||
{
|
||||
if (to_remove)
|
||||
VEC_safe_push (edge, heap, *to_remove, orig);
|
||||
set_zero_probability (orig);
|
||||
|
||||
/* Scale the frequencies of the blocks dominated by the exit. */
|
||||
if (bbs_to_scale)
|
||||
{
|
||||
EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
|
||||
{
|
||||
scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
|
||||
REG_BR_PROB_BASE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the original loop. */
|
||||
@ -1103,6 +1180,7 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
||||
free (first_active);
|
||||
|
||||
free (bbs);
|
||||
BITMAP_FREE (bbs_to_scale);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1239,13 +1317,12 @@ force_single_succ_latches (void)
|
||||
--- edge e ---> [cond expr] ---> [first_head]
|
||||
|
|
||||
+---------> [second_head]
|
||||
*/
|
||||
|
||||
THEN_PROB is the probability of then branch of the condition. */
|
||||
|
||||
static basic_block
|
||||
lv_adjust_loop_entry_edge (basic_block first_head,
|
||||
basic_block second_head,
|
||||
edge e,
|
||||
void *cond_expr)
|
||||
lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
|
||||
edge e, void *cond_expr, unsigned then_prob)
|
||||
{
|
||||
basic_block new_head = NULL;
|
||||
edge e1;
|
||||
@ -1256,13 +1333,18 @@ lv_adjust_loop_entry_edge (basic_block first_head,
|
||||
insert conditional expr. */
|
||||
new_head = split_edge (e);
|
||||
|
||||
|
||||
lv_add_condition_to_bb (first_head, second_head, new_head,
|
||||
cond_expr);
|
||||
|
||||
/* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
|
||||
e = single_succ_edge (new_head);
|
||||
e1 = make_edge (new_head, first_head,
|
||||
current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
|
||||
e1->probability = then_prob;
|
||||
e->probability = REG_BR_PROB_BASE - then_prob;
|
||||
e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
|
||||
e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
|
||||
|
||||
set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
|
||||
set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
|
||||
|
||||
@ -1281,12 +1363,18 @@ lv_adjust_loop_entry_edge (basic_block first_head,
|
||||
may be a run time test for things that were not resolved by static
|
||||
analysis (overlapping ranges (anti-aliasing), alignment, etc.).
|
||||
|
||||
THEN_PROB is the probability of the then edge of the if. THEN_SCALE
|
||||
is the ratio by that the frequencies in the original loop should
|
||||
be scaled. ELSE_SCALE is the ratio by that the frequencies in the
|
||||
new loop should be scaled.
|
||||
|
||||
If PLACE_AFTER is true, we place the new loop after LOOP in the
|
||||
instruction stream, otherwise it is placed before LOOP. */
|
||||
|
||||
struct loop *
|
||||
loop_version (struct loop *loop,
|
||||
void *cond_expr, basic_block *condition_bb,
|
||||
unsigned then_prob, unsigned then_scale, unsigned else_scale,
|
||||
bool place_after)
|
||||
{
|
||||
basic_block first_head, second_head;
|
||||
@ -1318,7 +1406,7 @@ loop_version (struct loop *loop,
|
||||
|
||||
/* Split loop entry edge and insert new block with cond expr. */
|
||||
cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
|
||||
entry, cond_expr);
|
||||
entry, cond_expr, then_prob);
|
||||
if (condition_bb)
|
||||
*condition_bb = cond_bb;
|
||||
|
||||
@ -1334,7 +1422,8 @@ loop_version (struct loop *loop,
|
||||
nloop = loopify (latch_edge,
|
||||
single_pred_edge (get_bb_copy (loop->header)),
|
||||
cond_bb, true_edge, false_edge,
|
||||
false /* Do not redirect all edges. */);
|
||||
false /* Do not redirect all edges. */,
|
||||
then_scale, else_scale);
|
||||
|
||||
exit = single_exit (loop);
|
||||
if (exit)
|
||||
|
@ -451,7 +451,8 @@ unswitch_loop (struct loop *loop, basic_block unswitch_on, rtx cond, rtx cinsn)
|
||||
/* Loopify from the copy of LOOP body, constructing the new loop. */
|
||||
nloop = loopify (latch_edge,
|
||||
single_pred_edge (get_bb_copy (loop->header)), switch_bb,
|
||||
BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true);
|
||||
BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true,
|
||||
prob, REG_BR_PROB_BASE - prob);
|
||||
|
||||
/* Remove branches that are now unreachable in new loops. */
|
||||
remove_path (true_edge);
|
||||
|
@ -868,6 +868,10 @@ canon_loop (struct loop *loop)
|
||||
}
|
||||
}
|
||||
|
||||
/* Probability in % that the sms-ed loop rolls enough so that optimized
|
||||
version may be entered. Just a guess. */
|
||||
#define PROB_SMS_ENOUGH_ITERATIONS 80
|
||||
|
||||
/* Main entry point, perform SMS scheduling on the loops of the function
|
||||
that consist of single basic blocks. */
|
||||
static void
|
||||
@ -882,7 +886,7 @@ sms_schedule (void)
|
||||
partial_schedule_ptr ps;
|
||||
struct df *df;
|
||||
basic_block bb = NULL;
|
||||
struct loop *loop, *nloop;
|
||||
struct loop *loop;
|
||||
basic_block condition_bb = NULL;
|
||||
edge latch_edge;
|
||||
gcov_type trip_count = 0;
|
||||
@ -1181,8 +1185,12 @@ sms_schedule (void)
|
||||
{
|
||||
rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
|
||||
GEN_INT(stage_count));
|
||||
unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
|
||||
* REG_BR_PROB_BASE) / 100;
|
||||
|
||||
nloop = loop_version (loop, comp_rtx, &condition_bb, true);
|
||||
loop_version (loop, comp_rtx, &condition_bb,
|
||||
prob, prob, REG_BR_PROB_BASE - prob,
|
||||
true);
|
||||
}
|
||||
|
||||
/* Set new iteration count of loop kernel. */
|
||||
|
@ -1,3 +1,8 @@
|
||||
2006-01-03 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/update-unroll-1.c: New test.
|
||||
* gcc.dg/tree-ssa/update-unswitch-1.c: New test.
|
||||
|
||||
2007-01-02 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* gcc.dg/pr16194.c: We now output error on all three functions, not just
|
||||
|
22
gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c
Normal file
22
gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c
Normal file
@ -0,0 +1,22 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target ilp32 } */
|
||||
/* { dg-options "-O1 -fprefetch-loop-arrays -march=athlon -fdump-tree-aprefetch-blocks" } */
|
||||
|
||||
int a[10000];
|
||||
|
||||
int foo(unsigned n)
|
||||
{
|
||||
unsigned i, s = 0;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
s += a[i];
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/* We used to make the probability that the body of the loop (unrolled
|
||||
to enable prefetching) is entered 0, which is not correct. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch"} } */
|
||||
/* { dg-final { scan-tree-dump-not "SUCC: 7 .100.0%" "aprefetch"} } */
|
||||
/* { dg-final { cleanup-tree-dump "aprefetch" } } */
|
24
gcc/testsuite/gcc.dg/tree-ssa/update-unswitch-1.c
Normal file
24
gcc/testsuite/gcc.dg/tree-ssa/update-unswitch-1.c
Normal file
@ -0,0 +1,24 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -funswitch-loops -fdump-tree-unswitch-blocks" } */
|
||||
|
||||
int bla(int p)
|
||||
{
|
||||
unsigned i, s = 1;
|
||||
|
||||
for (i = 4; i < 100; i++)
|
||||
{
|
||||
if (p)
|
||||
s += i/2;
|
||||
else
|
||||
s *= i/2;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/* We used to make the probability that the first of the loops created
|
||||
by unswitching is entered 100%, which is not correct. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch"} } */
|
||||
/* { dg-final { scan-tree-dump-not "SUCC: 3 .100.0%" "unswitch"} } */
|
||||
/* { dg-final { cleanup-tree-dump "unswitch" } } */
|
@ -805,6 +805,9 @@ determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
|
||||
post;
|
||||
} */
|
||||
|
||||
/* Probability in % that the unrolled loop is entered. Just a guess. */
|
||||
#define PROB_UNROLLED_LOOP_ENTERED 90
|
||||
|
||||
void
|
||||
tree_unroll_loop (struct loop *loop, unsigned factor,
|
||||
edge exit, struct tree_niter_desc *desc)
|
||||
@ -816,11 +819,12 @@ tree_unroll_loop (struct loop *loop, unsigned factor,
|
||||
struct loop *new_loop;
|
||||
basic_block rest, exit_bb;
|
||||
edge old_entry, new_entry, old_latch, precond_edge, new_exit;
|
||||
edge nonexit, new_nonexit;
|
||||
edge new_nonexit;
|
||||
block_stmt_iterator bsi;
|
||||
use_operand_p op;
|
||||
bool ok;
|
||||
unsigned est_niter;
|
||||
unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h;
|
||||
unsigned new_est_niter;
|
||||
unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
|
||||
sbitmap wont_exit;
|
||||
|
||||
@ -829,7 +833,29 @@ tree_unroll_loop (struct loop *loop, unsigned factor,
|
||||
&enter_main_cond, &exit_base, &exit_step,
|
||||
&exit_cmp, &exit_bound);
|
||||
|
||||
new_loop = loop_version (loop, enter_main_cond, NULL, true);
|
||||
/* Let us assume that the unrolled loop is quite likely to be entered. */
|
||||
if (integer_nonzerop (enter_main_cond))
|
||||
prob_entry = REG_BR_PROB_BASE;
|
||||
else
|
||||
prob_entry = PROB_UNROLLED_LOOP_ENTERED * REG_BR_PROB_BASE / 100;
|
||||
|
||||
/* The values for scales should keep profile consistent, and somewhat close
|
||||
to correct.
|
||||
|
||||
TODO: The current value of SCALE_REST makes it appear that the loop that
|
||||
is created by splitting the remaining iterations of the unrolled loop is
|
||||
executed the same number of times as the original loop, and with the same
|
||||
frequencies, which is obviously wrong. This does not appear to cause
|
||||
problems, so we do not bother with fixing it for now. To make the profile
|
||||
correct, we would need to change the probability of the exit edge of the
|
||||
loop, and recompute the distribution of frequencies in its body because
|
||||
of this change (scale the frequencies of blocks before and after the exit
|
||||
by appropriate factors). */
|
||||
scale_unrolled = prob_entry;
|
||||
scale_rest = REG_BR_PROB_BASE;
|
||||
|
||||
new_loop = loop_version (loop, enter_main_cond, NULL,
|
||||
prob_entry, scale_unrolled, scale_rest, true);
|
||||
gcc_assert (new_loop != NULL);
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
@ -837,14 +863,6 @@ tree_unroll_loop (struct loop *loop, unsigned factor,
|
||||
dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
|
||||
? boolean_false_node
|
||||
: boolean_true_node);
|
||||
if (exit == EDGE_SUCC (exit->src, 0))
|
||||
nonexit = EDGE_SUCC (exit->src, 1);
|
||||
else
|
||||
nonexit = EDGE_SUCC (exit->src, 0);
|
||||
nonexit->probability = REG_BR_PROB_BASE;
|
||||
exit->probability = 0;
|
||||
nonexit->count += exit->count;
|
||||
exit->count = 0;
|
||||
exit_if = last_stmt (exit->src);
|
||||
COND_EXPR_COND (exit_if) = dont_exit;
|
||||
update_stmt (exit_if);
|
||||
@ -858,6 +876,29 @@ tree_unroll_loop (struct loop *loop, unsigned factor,
|
||||
gcc_assert (ok);
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
/* Determine the probability of the exit edge. */
|
||||
new_est_niter = est_niter / factor;
|
||||
|
||||
/* Without profile feedback, loops for that we do not know a better estimate
|
||||
are assumed to roll 10 times. When we unroll such loop, it appears to
|
||||
roll too little, and it may even seem to be cold. To avoid this, we
|
||||
ensure that the created loop appears to roll at least 5 times (but at
|
||||
most as many times as before unrolling). */
|
||||
if (new_est_niter < 5)
|
||||
{
|
||||
if (est_niter < 5)
|
||||
new_est_niter = est_niter;
|
||||
else
|
||||
new_est_niter = 5;
|
||||
}
|
||||
|
||||
/* Ensure that the frequencies in the loop match the new estimated
|
||||
number of iterations. */
|
||||
freq_h = loop->header->frequency;
|
||||
freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
|
||||
if (freq_h != 0)
|
||||
scale_loop_frequencies (loop, freq_e * new_est_niter, freq_h);
|
||||
|
||||
/* Prepare the cfg and update the phi nodes. */
|
||||
rest = loop_preheader_edge (new_loop)->src;
|
||||
precond_edge = single_pred_edge (rest);
|
||||
@ -866,12 +907,16 @@ tree_unroll_loop (struct loop *loop, unsigned factor,
|
||||
|
||||
new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
|
||||
new_exit->count = loop_preheader_edge (loop)->count;
|
||||
est_niter = est_niter / factor + 1;
|
||||
new_exit->probability = REG_BR_PROB_BASE / est_niter;
|
||||
new_exit->probability = REG_BR_PROB_BASE / new_est_niter;
|
||||
|
||||
rest->count += new_exit->count;
|
||||
rest->frequency += EDGE_FREQUENCY (new_exit);
|
||||
|
||||
new_nonexit = single_pred_edge (loop->latch);
|
||||
new_nonexit->flags = EDGE_TRUE_VALUE;
|
||||
new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
|
||||
scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
|
||||
REG_BR_PROB_BASE);
|
||||
|
||||
old_entry = loop_preheader_edge (loop);
|
||||
new_entry = loop_preheader_edge (new_loop);
|
||||
|
@ -269,13 +269,17 @@ static struct loop *
|
||||
tree_unswitch_loop (struct loop *loop,
|
||||
basic_block unswitch_on, tree cond)
|
||||
{
|
||||
basic_block condition_bb;
|
||||
unsigned prob_true;
|
||||
edge edge_true, edge_false;
|
||||
|
||||
/* Some sanity checking. */
|
||||
gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
|
||||
gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
|
||||
gcc_assert (loop->inner == NULL);
|
||||
|
||||
extract_true_false_edges_from_block (unswitch_on, &edge_true, &edge_false);
|
||||
prob_true = edge_true->probability;
|
||||
return loop_version (loop, unshare_expr (cond),
|
||||
&condition_bb, false);
|
||||
NULL, prob_true, prob_true,
|
||||
REG_BR_PROB_BASE - prob_true, false);
|
||||
}
|
||||
|
@ -4699,11 +4699,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||
basic_block new_exit_bb;
|
||||
edge new_exit_e, e;
|
||||
tree orig_phi, new_phi, arg;
|
||||
unsigned prob = 4 * REG_BR_PROB_BASE / 5;
|
||||
|
||||
cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
|
||||
&cond_expr_stmt_list);
|
||||
initialize_original_copy_tables ();
|
||||
nloop = loop_version (loop, cond_expr, &condition_bb, true);
|
||||
nloop = loop_version (loop, cond_expr, &condition_bb,
|
||||
prob, prob, REG_BR_PROB_BASE - prob, true);
|
||||
free_original_copy_tables();
|
||||
|
||||
/** Loop versioning violates an assumption we try to maintain during
|
||||
|
Loading…
x
Reference in New Issue
Block a user