From e78410bf1198e3ce4a37678c991821359b0fde79 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Fri, 28 Sep 2012 23:14:23 +0200 Subject: [PATCH] basic-block.h (RDIV): Define. * basic-block.h (RDIV): Define. (EDGE_FREQUENCY): Simplify. (check_probability, combine_probabilities, apply_probability, inverse_probability): New. * cfgloop.c (scale_loop_profile): New function. * cfgloop.h (scale_loop_profile): Declare. (slpeel_add_loop_guard): Add probability parameter. (set_prologue_iterations): Add probability parameter. (slpeel_tree_peel_loop_to_edge): Add bound1 and bound2 parameters; update probabilities correctly. (vect_do_peeling_for_alignment, vect_gen_niters_for_prolog_loop): New. From-SVN: r191839 --- gcc/ChangeLog | 14 +++++ gcc/basic-block.h | 43 ++++++++++++-- gcc/cfgloop.c | 118 +++++++++++++++++++++++++++++++++++++ gcc/cfgloop.h | 1 + gcc/tree-vect-loop-manip.c | 73 +++++++++++++++++++---- 5 files changed, 235 insertions(+), 14 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7851337f434d..78c31bca6f40 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2012-09-28 Jan Hubicka + + * basic-block.h (RDIV): Define. + (EDGE_FREQUENCY): Simplify. + (check_probability, combine_probabilities, apply_probability, + inverse_probability): New. + * cfgloop.c (scale_loop_profile): New function. + * cfgloop.h (scale_loop_profile): Declare. + (slpeel_add_loop_guard): Add probability parameter. + (set_prologue_iterations): Add probability parameter. + (slpeel_tree_peel_loop_to_edge): Add bound1 and bound2 parameters; + update probabilities correctly. + (vect_do_peeling_for_alignment, vect_gen_niters_for_prolog_loop): New. + 2012-09-20 Bernd Schmidt PR bootstrap/54688 diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 32dd17736647..a5491b030119 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -478,11 +478,10 @@ struct edge_list #define BRANCH_EDGE(bb) (EDGE_SUCC ((bb), 0)->flags & EDGE_FALLTHRU \ ? EDGE_SUCC ((bb), 1) : EDGE_SUCC ((bb), 0)) +#define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) /* Return expected execution frequency of the edge E. */ -#define EDGE_FREQUENCY(e) (((e)->src->frequency \ - * (e)->probability \ - + REG_BR_PROB_BASE / 2) \ - / REG_BR_PROB_BASE) +#define EDGE_FREQUENCY(e) RDIV ((e)->src->frequency * (e)->probability, \ + REG_BR_PROB_BASE) /* Return nonzero if edge is critical. */ #define EDGE_CRITICAL_P(e) (EDGE_COUNT ((e)->src->succs) >= 2 \ @@ -910,4 +909,40 @@ extern void default_rtl_profile (void); /* In profile.c. */ extern gcov_working_set_t *find_working_set(unsigned pct_times_10); +/* Check tha probability is sane. */ + +static inline void +check_probability (int prob) +{ + gcc_checking_assert (prob >= 0 && prob <= REG_BR_PROB_BASE); +} + +/* Given PROB1 and PROB2, return PROB1*PROB2/REG_BR_PROB_BASE. + Used to combine BB probabilities. */ + +static inline int +combine_probabilities (int prob1, int prob2) +{ + check_probability (prob1); + check_probability (prob2); + return RDIV (prob1 * prob2, REG_BR_PROB_BASE); +} + +/* Apply probability PROB on frequency or count FREQ. */ + +static inline gcov_type +apply_probability (gcov_type freq, int prob) +{ + check_probability (prob); + return RDIV (freq * prob, REG_BR_PROB_BASE); +} + +/* Return inverse probability for PROB. */ + +static inline int +inverse_probability (int prob1) +{ + check_probability (prob1); + return REG_BR_PROB_BASE - prob1; +} #endif /* GCC_BASIC_BLOCK_H */ diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 44df99b8f330..62d3d50ab690 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -1666,3 +1666,121 @@ loop_exits_from_bb_p (struct loop *loop, basic_block bb) return false; } + +/* Scale the profile estiamte within loop by SCALE. + If ITERATION_BOUND is non-zero, scale even further if loop is predicted + to iterate too many times. */ +void +scale_loop_profile (struct loop *loop, int scale, int iteration_bound) +{ + gcov_type iterations = expected_loop_iterations_unbounded (loop); + basic_block *bbs; + unsigned int i; + edge e; + edge_iterator ei; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Scaling loop %i with scale %f, " + "bounding iterations to %i from guessed %i\n", + loop->num, (double)scale / REG_BR_PROB_BASE, + iteration_bound, (int)iterations); + + /* See if loop is predicted to iterate too many times. */ + if (iteration_bound && iterations > 0 + && RDIV (iterations * scale, REG_BR_PROB_BASE) > iteration_bound) + { + /* Fixing loop profile for different trip count is not trivial; the exit + probabilities has to be updated to match and frequencies propagated down + to the loop body. + + We fully update only the simple case of loop with single exit that is + either from the latch or BB just before latch and leads from BB with + simple conditional jump. This is OK for use in vectorizer. */ + e = single_exit (loop); + if (e) + { + edge other_e; + int freq_delta; + gcov_type count_delta; + + FOR_EACH_EDGE (other_e, ei, e->src->succs) + if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE)) + && e != other_e) + break; + + /* Probability of exit must be 1/iterations. */ + freq_delta = EDGE_FREQUENCY (e); + e->probability = REG_BR_PROB_BASE / iteration_bound; + other_e->probability = inverse_probability (e->probability); + freq_delta -= EDGE_FREQUENCY (e); + + /* Adjust counts accordingly. */ + count_delta = e->count; + e->count = apply_probability (e->src->count, e->probability); + other_e->count = apply_probability (e->src->count, other_e->probability); + count_delta -= e->count; + + /* If latch exists, change its frequency and count, since we changed + probability of exit. Theoretically we should update everything from + source of exit edge to latch, but for vectorizer this is enough. */ + if (loop->latch + && loop->latch != e->src) + { + loop->latch->frequency += freq_delta; + if (loop->latch->frequency < 0) + loop->latch->frequency = 0; + loop->latch->count += count_delta; + if (loop->latch->count < 0) + loop->latch->count = 0; + } + } + + /* Roughly speaking we want to reduce the loop body profile by the + the difference of loop iterations. We however can do better if + we look at the actual profile, if it is available. */ + scale = RDIV (iteration_bound * scale, iterations); + if (loop->header->count) + { + gcov_type count_in = 0; + + FOR_EACH_EDGE (e, ei, loop->header->preds) + if (e->src != loop->latch) + count_in += e->count; + + if (count_in != 0) + scale = RDIV (count_in * iteration_bound * REG_BR_PROB_BASE, loop->header->count); + } + else if (loop->header->frequency) + { + int freq_in = 0; + + FOR_EACH_EDGE (e, ei, loop->header->preds) + if (e->src != loop->latch) + freq_in += EDGE_FREQUENCY (e); + + if (freq_in != 0) + scale = RDIV (freq_in * iteration_bound * REG_BR_PROB_BASE, loop->header->frequency); + } + if (!scale) + scale = 1; + } + + if (scale == REG_BR_PROB_BASE) + return; + + /* Scale the actual probabilities. */ + bbs = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + { + basic_block bb = bbs[i]; + + bb->count = RDIV (bb->count * scale, REG_BR_PROB_BASE); + bb->frequency = RDIV (bb->frequency * scale, REG_BR_PROB_BASE); + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = RDIV (e->count * scale, REG_BR_PROB_BASE); + } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; guessed iterations are now %i\n", + (int)expected_loop_iterations_unbounded (loop)); + free (bbs); +} diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index ed6ef3d5c87e..e0a370f32bff 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -713,5 +713,6 @@ extern void unroll_and_peel_loops (int); extern void doloop_optimize_loops (void); extern void move_loop_invariants (void); extern bool finite_loop_p (struct loop *); +extern void scale_loop_profile (struct loop *loop, int scale, int iteration_bound); #endif /* GCC_CFGLOOP_H */ diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 508dff0f714a..f480953440fc 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -931,7 +931,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e) static edge slpeel_add_loop_guard (basic_block guard_bb, tree cond, gimple_seq cond_expr_stmt_list, - basic_block exit_bb, basic_block dom_bb) + basic_block exit_bb, basic_block dom_bb, + int probability) { gimple_stmt_iterator gsi; edge new_e, enter_e; @@ -956,6 +957,12 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, /* Add new edge to connect guard block to the merge/loop-exit block. */ new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); + + new_e->count = guard_bb->count; + new_e->probability = probability; + new_e->count = apply_probability (enter_e->count, probability); + enter_e->count -= new_e->count; + enter_e->probability = inverse_probability (probability); set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); return new_e; } @@ -1038,7 +1045,8 @@ static void set_prologue_iterations (basic_block bb_before_first_loop, tree *first_niters, struct loop *loop, - unsigned int th) + unsigned int th, + int probability) { edge e; basic_block cond_bb, then_bb; @@ -1067,7 +1075,15 @@ set_prologue_iterations (basic_block bb_before_first_loop, e_true->flags &= ~EDGE_FALLTHRU; e_true->flags |= EDGE_TRUE_VALUE; + e_true->probability = probability; + e_false->probability = inverse_probability (probability); + e_true->count = apply_probability (cond_bb->count, probability); + e_false->count = cond_bb->count - e_true->count; + then_bb->frequency = EDGE_FREQUENCY (e_true); + then_bb->count = e_true->count; + e_fallthru = EDGE_SUCC (then_bb, 0); + e_fallthru->count = then_bb->count; gsi = gsi_last_bb (cond_bb); cost_pre_condition = @@ -1126,6 +1142,8 @@ set_prologue_iterations (basic_block bb_before_first_loop, prologue generation or whether cost model check has not occurred during prologue generation and hence needs to occur during epilogue generation. + - BOUND1 is the upper bound on number of iterations of the first loop (if known) + - BOUND2 is the upper bound on number of iterations of the second loop (if known) Output: @@ -1153,7 +1171,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, edge e, tree *first_niters, tree niters, bool update_first_loop_count, unsigned int th, bool check_profitability, - tree cond_expr, gimple_seq cond_expr_stmt_list) + tree cond_expr, gimple_seq cond_expr_stmt_list, + int bound1, int bound2) { struct loop *new_loop = NULL, *first_loop, *second_loop; edge skip_e; @@ -1166,6 +1185,13 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, edge exit_e = single_exit (loop); LOC loop_loc; tree cost_pre_condition = NULL_TREE; + /* There are many aspects to how likely the first loop is going to be executed. + Without histogram we can't really do good job. Simply set it to + 2/3, so the first loop is not reordered to the end of function and + the hot path through stays short. */ + int first_guard_probability = 2 * REG_BR_PROB_BASE / 3; + int second_guard_probability = 2 * REG_BR_PROB_BASE / 3; + int probability_of_second_loop; if (!slpeel_can_duplicate_loop_p (loop, e)) return NULL; @@ -1341,6 +1367,21 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); bb_before_second_loop = split_edge (single_exit (first_loop)); + probability_of_second_loop = (inverse_probability (first_guard_probability) + + combine_probabilities (second_guard_probability, + first_guard_probability)); + /* Theoretically preheader edge of first loop and exit edge should have + same frequencies. Loop exit probablities are however easy to get wrong. + It is safer to copy value from original loop entry. */ + bb_before_second_loop->frequency + = apply_probability (bb_before_first_loop->frequency, + probability_of_second_loop); + bb_before_second_loop->count + = apply_probability (bb_before_first_loop->count, + probability_of_second_loop); + single_succ_edge (bb_before_second_loop)->count + = bb_before_second_loop->count; + /* Epilogue peeling. */ if (!update_first_loop_count) { @@ -1374,7 +1415,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, { if (check_profitability) set_prologue_iterations (bb_before_first_loop, first_niters, - loop, th); + loop, th, first_guard_probability); pre_condition = fold_build2 (LE_EXPR, boolean_type_node, *first_niters, @@ -1383,7 +1424,10 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, cond_expr_stmt_list, - bb_before_second_loop, bb_before_first_loop); + bb_before_second_loop, bb_before_first_loop, + inverse_probability (first_guard_probability)); + scale_loop_profile (first_loop, first_guard_probability, + check_profitability && (int)th > bound1 ? th : bound1); slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, first_loop == new_loop, &new_exit_bb); @@ -1421,7 +1465,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, pre_condition = fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters); skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL, - bb_after_second_loop, bb_before_first_loop); + bb_after_second_loop, bb_before_first_loop, + inverse_probability (second_guard_probability)); + scale_loop_profile (second_loop, probability_of_second_loop, bound2); slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, second_loop == new_loop, &new_exit_bb); @@ -1882,7 +1928,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop), &ratio_mult_vf_name, ni_name, false, th, check_profitability, - cond_expr, cond_expr_stmt_list); + cond_expr, cond_expr_stmt_list, + 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); gcc_assert (new_loop); gcc_assert (loop_num == loop->num); #ifdef ENABLE_CHECKING @@ -1951,7 +1998,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, use TYPE_VECTOR_SUBPARTS. */ static tree -vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) +vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound) { struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); @@ -1977,6 +2024,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) fprintf (vect_dump, "known peeling = %d.", npeel); iters = build_int_cst (niters_type, npeel); + *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); } else { @@ -2015,6 +2063,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign); iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1); iters = fold_convert (niters_type, iters); + *bound = nelements; } /* Create: prolog_loop_niters = min (iters, loop_niters) */ @@ -2107,6 +2156,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree wide_prolog_niters; struct loop *new_loop; int max_iter; + int bound = 0; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_do_peeling_for_alignment ==="); @@ -2115,13 +2165,16 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, ni_name = vect_build_loop_niters (loop_vinfo, NULL); niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, - ni_name); + ni_name, + &bound); /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ new_loop = slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop), &niters_of_prolog_loop, ni_name, true, - th, check_profitability, NULL_TREE, NULL); + th, check_profitability, NULL_TREE, NULL, + bound, + 0); gcc_assert (new_loop); #ifdef ENABLE_CHECKING