mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-22 07:00:29 +08:00
Make profile estimation more precise
While analyzing code size regression in SPEC2k GCC binary I noticed that we perform some inline decisions because we think that number of executions are very high. In particular there was inline decision inlining gen_rtx_fmt_ee to find_reloads believing that it is called 4 billion times. This turned out to be cummulation of roundoff errors in propagate_freq which was bit mechanically updated from original sreals to C++ sreals and later to new probabilities. This led us to estimate that a loopback edge is reached with probability 2.3 which was capped to 1-1/10000 and since this happened in nested loop it quickly escalated to large values. Originally capping to REG_BR_PROB_BASE avoided such problems but now we have much higher range. This patch avoids going from probabilites to REG_BR_PROB_BASE so precision is kept. In addition it makes the propagation to not estimate more than param-max-predicted-loop-iterations. The first change makes the cap to not be triggered on the gcc build, but it is still better to be safe than sorry. * ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of dump. * params.opt: (max-predicted-iterations): Set bounds. * predict.c (real_almost_one, real_br_prob_base, real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove. (propagate_freq): Add max_cyclic_prob parameter; cap cyclic probabilities; do not truncate to reg_br_prob_bases. (estimate_loops_at_level): Pass max_cyclic_prob. (estimate_loops): Compute max_cyclic_prob. (estimate_bb_frequencies): Do not initialize real_*; update calculation of back edge prob. * profile-count.c (profile_probability::to_sreal): New. * profile-count.h (class sreal): Move up in file. (profile_probability::to_sreal): Declare.
This commit is contained in:
parent
801f5b9677
commit
f5b25e1516
@ -1,3 +1,20 @@
|
||||
2020-01-16 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
|
||||
dump.
|
||||
* params.opt: (max-predicted-iterations): Set bounds.
|
||||
* predict.c (real_almost_one, real_br_prob_base,
|
||||
real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
|
||||
(propagate_freq): Add max_cyclic_prob parameter; cap cyclic
|
||||
probabilities; do not truncate to reg_br_prob_bases.
|
||||
(estimate_loops_at_level): Pass max_cyclic_prob.
|
||||
(estimate_loops): Compute max_cyclic_prob.
|
||||
(estimate_bb_frequencies): Do not initialize real_*; update calculation
|
||||
of back edge prob.
|
||||
* profile-count.c (profile_probability::to_sreal): New.
|
||||
* profile-count.h (class sreal): Move up in file.
|
||||
(profile_probability::to_sreal): Declare.
|
||||
|
||||
2020-01-16 Stam Markianos-Wright <stam.markianos-wright@arm.com>
|
||||
|
||||
* config/arm/arm.c
|
||||
|
@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
|
||||
gcc_assert (*size == old_size);
|
||||
if (time && (*time - old_time > 1 || *time - old_time < -1)
|
||||
&& dump_file)
|
||||
fprintf (dump_file, "Time mismatch in call summary %f!=%f",
|
||||
fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
|
||||
old_time.to_double (),
|
||||
time->to_double ());
|
||||
}
|
||||
|
@ -555,7 +555,7 @@ Common Joined UInteger Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32)
|
||||
Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant.
|
||||
|
||||
-param=max-predicted-iterations=
|
||||
Common Joined UInteger Var(param_max_predicted_iterations) Init(100) Param Optimization
|
||||
Common Joined UInteger Var(param_max_predicted_iterations) Init(100) IntegerRange(1, 65536) Param Optimization
|
||||
The maximum number of loop iterations we predict statically.
|
||||
|
||||
-param=max-reload-search-insns=
|
||||
|
@ -76,10 +76,6 @@ enum predictor_reason
|
||||
static const char *reason_messages[] = {"", " (ignored)",
|
||||
" (single edge duplicate)", " (edge pair duplicate)"};
|
||||
|
||||
/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
|
||||
1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
|
||||
static sreal real_almost_one, real_br_prob_base,
|
||||
real_inv_br_prob_base, real_one_half, real_bb_freq_max;
|
||||
|
||||
static void combine_predictions_for_insn (rtx_insn *, basic_block);
|
||||
static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
|
||||
@ -3266,7 +3262,8 @@ public:
|
||||
TOVISIT, starting in HEAD. */
|
||||
|
||||
static void
|
||||
propagate_freq (basic_block head, bitmap tovisit)
|
||||
propagate_freq (basic_block head, bitmap tovisit,
|
||||
sreal max_cyclic_prob)
|
||||
{
|
||||
basic_block bb;
|
||||
basic_block last;
|
||||
@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit)
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
if (EDGE_INFO (e)->back_edge)
|
||||
{
|
||||
cyclic_probability += EDGE_INFO (e)->back_edge_prob;
|
||||
}
|
||||
cyclic_probability += EDGE_INFO (e)->back_edge_prob;
|
||||
else if (!(e->flags & EDGE_DFS_BACK))
|
||||
{
|
||||
/* frequency += (e->probability
|
||||
* BLOCK_INFO (e->src)->frequency /
|
||||
REG_BR_PROB_BASE); */
|
||||
|
||||
/* FIXME: Graphite is producing edges with no profile. Once
|
||||
this is fixed, drop this. */
|
||||
sreal tmp = e->probability.initialized_p () ?
|
||||
e->probability.to_reg_br_prob_base () : 0;
|
||||
tmp *= BLOCK_INFO (e->src)->frequency;
|
||||
tmp *= real_inv_br_prob_base;
|
||||
frequency += tmp;
|
||||
e->probability.to_sreal () : 0;
|
||||
frequency += tmp * BLOCK_INFO (e->src)->frequency;
|
||||
}
|
||||
|
||||
if (cyclic_probability == 0)
|
||||
@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cyclic_probability > real_almost_one)
|
||||
cyclic_probability = real_almost_one;
|
||||
if (cyclic_probability > max_cyclic_prob)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"cyclic probability of bb %i is %f (capped to %f)"
|
||||
"; turning freq %f",
|
||||
bb->index, cyclic_probability.to_double (),
|
||||
max_cyclic_prob.to_double (),
|
||||
frequency.to_double ());
|
||||
|
||||
cyclic_probability = max_cyclic_prob;
|
||||
}
|
||||
else if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"cyclic probability of bb %i is %f; turning freq %f",
|
||||
bb->index, cyclic_probability.to_double (),
|
||||
frequency.to_double ());
|
||||
|
||||
/* BLOCK_INFO (bb)->frequency = frequency
|
||||
/ (1 - cyclic_probability) */
|
||||
|
||||
cyclic_probability = sreal (1) - cyclic_probability;
|
||||
BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
|
||||
BLOCK_INFO (bb)->frequency = frequency
|
||||
/ (sreal (1) - cyclic_probability);
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " to %f\n",
|
||||
BLOCK_INFO (bb)->frequency.to_double ());
|
||||
}
|
||||
}
|
||||
|
||||
@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit)
|
||||
e = find_edge (bb, head);
|
||||
if (e)
|
||||
{
|
||||
/* EDGE_INFO (e)->back_edge_prob
|
||||
= ((e->probability * BLOCK_INFO (bb)->frequency)
|
||||
/ REG_BR_PROB_BASE); */
|
||||
|
||||
/* FIXME: Graphite is producing edges with no profile. Once
|
||||
this is fixed, drop this. */
|
||||
sreal tmp = e->probability.initialized_p () ?
|
||||
e->probability.to_reg_br_prob_base () : 0;
|
||||
tmp *= BLOCK_INFO (bb)->frequency;
|
||||
EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
|
||||
e->probability.to_sreal () : 0;
|
||||
EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
|
||||
}
|
||||
|
||||
/* Propagate to successor blocks. */
|
||||
@ -3396,7 +3395,7 @@ propagate_freq (basic_block head, bitmap tovisit)
|
||||
/* Estimate frequencies in loops at same nest level. */
|
||||
|
||||
static void
|
||||
estimate_loops_at_level (class loop *first_loop)
|
||||
estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
|
||||
{
|
||||
class loop *loop;
|
||||
|
||||
@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop)
|
||||
unsigned i;
|
||||
auto_bitmap tovisit;
|
||||
|
||||
estimate_loops_at_level (loop->inner);
|
||||
estimate_loops_at_level (loop->inner, max_cyclic_prob);
|
||||
|
||||
/* Find current loop back edge and mark it. */
|
||||
e = loop_latch_edge (loop);
|
||||
@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop)
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
bitmap_set_bit (tovisit, bbs[i]->index);
|
||||
free (bbs);
|
||||
propagate_freq (loop->header, tovisit);
|
||||
propagate_freq (loop->header, tovisit, max_cyclic_prob);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3428,17 +3427,18 @@ estimate_loops (void)
|
||||
{
|
||||
auto_bitmap tovisit;
|
||||
basic_block bb;
|
||||
sreal max_cyclic_prob = (sreal)1 - (sreal)1 / param_max_predicted_iterations;
|
||||
|
||||
/* Start by estimating the frequencies in the loops. */
|
||||
if (number_of_loops (cfun) > 1)
|
||||
estimate_loops_at_level (current_loops->tree_root->inner);
|
||||
estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
|
||||
|
||||
/* Now propagate the frequencies through all the blocks. */
|
||||
FOR_ALL_BB_FN (bb, cfun)
|
||||
{
|
||||
bitmap_set_bit (tovisit, bb->index);
|
||||
}
|
||||
propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
|
||||
propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
|
||||
}
|
||||
|
||||
/* Drop the profile for NODE to guessed, and update its frequency based on
|
||||
@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force)
|
||||
if (force || profile_status_for_fn (cfun) != PROFILE_READ
|
||||
|| !update_max_bb_count ())
|
||||
{
|
||||
static int real_values_initialized = 0;
|
||||
|
||||
if (!real_values_initialized)
|
||||
{
|
||||
real_values_initialized = 1;
|
||||
real_br_prob_base = REG_BR_PROB_BASE;
|
||||
/* Scaling frequencies up to maximal profile count may result in
|
||||
frequent overflows especially when inlining loops.
|
||||
Small scalling results in unnecesary precision loss. Stay in
|
||||
the half of the (exponential) range. */
|
||||
real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
|
||||
real_one_half = sreal (1, -1);
|
||||
real_inv_br_prob_base = sreal (1) / real_br_prob_base;
|
||||
real_almost_one = sreal (1) - real_inv_br_prob_base;
|
||||
}
|
||||
|
||||
mark_dfs_back_edges ();
|
||||
|
||||
@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force)
|
||||
this is fixed, drop this. */
|
||||
if (e->probability.initialized_p ())
|
||||
EDGE_INFO (e)->back_edge_prob
|
||||
= e->probability.to_reg_br_prob_base ();
|
||||
= e->probability.to_sreal ();
|
||||
else
|
||||
EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
|
||||
EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
|
||||
/* back_edge_prob = 0.5 */
|
||||
EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force)
|
||||
if (freq_max < BLOCK_INFO (bb)->frequency)
|
||||
freq_max = BLOCK_INFO (bb)->frequency;
|
||||
|
||||
freq_max = real_bb_freq_max / freq_max;
|
||||
/* Scaling frequencies up to maximal profile count may result in
|
||||
frequent overflows especially when inlining loops.
|
||||
Small scalling results in unnecesary precision loss. Stay in
|
||||
the half of the (exponential) range. */
|
||||
freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
|
||||
if (freq_max < 16)
|
||||
freq_max = 16;
|
||||
profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
|
||||
cfun->cfg->count_max = profile_count::uninitialized ();
|
||||
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
|
||||
{
|
||||
sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
|
||||
sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + sreal (1, -1);
|
||||
profile_count count = profile_count::from_gcov_type (tmp.to_int ());
|
||||
|
||||
/* If we have profile feedback in which this function was never
|
||||
|
@ -446,3 +446,12 @@ profile_probability::combine_with_count (profile_count count1,
|
||||
else
|
||||
return *this * even () + other * even ();
|
||||
}
|
||||
|
||||
/* Return probability as sreal in range [0, 1]. */
|
||||
|
||||
sreal
|
||||
profile_probability::to_sreal () const
|
||||
{
|
||||
gcc_checking_assert (initialized_p ());
|
||||
return ((sreal)m_val) >> (n_bits - 2);
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
struct function;
|
||||
struct profile_count;
|
||||
class sreal;
|
||||
|
||||
/* Quality of the profile count. Because gengtype does not support enums
|
||||
inside of classes, this is in global namespace. */
|
||||
@ -614,6 +615,8 @@ public:
|
||||
profile_probability other,
|
||||
profile_count count2) const;
|
||||
|
||||
/* Return probability as sreal. */
|
||||
sreal to_sreal () const;
|
||||
/* LTO streaming support. */
|
||||
static profile_probability stream_in (class lto_input_block *);
|
||||
void stream_out (struct output_block *);
|
||||
@ -674,8 +677,6 @@ public:
|
||||
|
||||
*/
|
||||
|
||||
class sreal;
|
||||
|
||||
struct GTY(()) profile_count
|
||||
{
|
||||
public:
|
||||
|
Loading…
x
Reference in New Issue
Block a user