Make profile estimation more precise

While analyzing code size regression in SPEC2k GCC binary I noticed that we
perform some inline decisions because we think that number of executions are
very high.
In particular there was inline decision inlining gen_rtx_fmt_ee to find_reloads
believing that it is called 4 billion times.  This turned out to be cummulation
of roundoff errors in propagate_freq which was bit mechanically updated from
original sreals to C++ sreals and later to new probabilities.

This led us to estimate that a loopback edge is reached with probability 2.3
which was capped to 1-1/10000 and since this happened in nested loop it quickly
escalated to large values.

Originally capping to REG_BR_PROB_BASE avoided such problems but now we have
much higher range.

This patch avoids going from probabilites to REG_BR_PROB_BASE so precision is
kept.  In addition it makes the propagation to not estimate more than
param-max-predicted-loop-iterations.  The first change makes the cap to not
be triggered on the gcc build, but it is still better to be safe than sorry.

	* ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
	dump.
	* params.opt: (max-predicted-iterations): Set bounds.
	* predict.c (real_almost_one, real_br_prob_base,
	real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
	(propagate_freq): Add max_cyclic_prob parameter; cap cyclic
	probabilities; do not truncate to reg_br_prob_bases.
	(estimate_loops_at_level): Pass max_cyclic_prob.
	(estimate_loops): Compute max_cyclic_prob.
	(estimate_bb_frequencies): Do not initialize real_*; update calculation
	of back edge prob.
	* profile-count.c (profile_probability::to_sreal): New.
	* profile-count.h (class sreal): Move up in file.
	(profile_probability::to_sreal): Declare.
This commit is contained in:
Jan Hubicka 2020-01-16 23:55:44 +01:00
parent 801f5b9677
commit f5b25e1516
6 changed files with 75 additions and 59 deletions

View File

@ -1,3 +1,20 @@
2020-01-16 Jan Hubicka <hubicka@ucw.cz>
* ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
dump.
* params.opt: (max-predicted-iterations): Set bounds.
* predict.c (real_almost_one, real_br_prob_base,
real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
(propagate_freq): Add max_cyclic_prob parameter; cap cyclic
probabilities; do not truncate to reg_br_prob_bases.
(estimate_loops_at_level): Pass max_cyclic_prob.
(estimate_loops): Compute max_cyclic_prob.
(estimate_bb_frequencies): Do not initialize real_*; update calculation
of back edge prob.
* profile-count.c (profile_probability::to_sreal): New.
* profile-count.h (class sreal): Move up in file.
(profile_probability::to_sreal): Declare.
2020-01-16 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* config/arm/arm.c

View File

@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
gcc_assert (*size == old_size);
if (time && (*time - old_time > 1 || *time - old_time < -1)
&& dump_file)
fprintf (dump_file, "Time mismatch in call summary %f!=%f",
fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
old_time.to_double (),
time->to_double ());
}

View File

@ -555,7 +555,7 @@ Common Joined UInteger Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32)
Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant.
-param=max-predicted-iterations=
Common Joined UInteger Var(param_max_predicted_iterations) Init(100) Param Optimization
Common Joined UInteger Var(param_max_predicted_iterations) Init(100) IntegerRange(1, 65536) Param Optimization
The maximum number of loop iterations we predict statically.
-param=max-reload-search-insns=

View File

@ -76,10 +76,6 @@ enum predictor_reason
static const char *reason_messages[] = {"", " (ignored)",
" (single edge duplicate)", " (edge pair duplicate)"};
/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
static sreal real_almost_one, real_br_prob_base,
real_inv_br_prob_base, real_one_half, real_bb_freq_max;
static void combine_predictions_for_insn (rtx_insn *, basic_block);
static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
@ -3266,7 +3262,8 @@ public:
TOVISIT, starting in HEAD. */
static void
propagate_freq (basic_block head, bitmap tovisit)
propagate_freq (basic_block head, bitmap tovisit,
sreal max_cyclic_prob)
{
basic_block bb;
basic_block last;
@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit)
FOR_EACH_EDGE (e, ei, bb->preds)
if (EDGE_INFO (e)->back_edge)
{
cyclic_probability += EDGE_INFO (e)->back_edge_prob;
}
cyclic_probability += EDGE_INFO (e)->back_edge_prob;
else if (!(e->flags & EDGE_DFS_BACK))
{
/* frequency += (e->probability
* BLOCK_INFO (e->src)->frequency /
REG_BR_PROB_BASE); */
/* FIXME: Graphite is producing edges with no profile. Once
this is fixed, drop this. */
sreal tmp = e->probability.initialized_p () ?
e->probability.to_reg_br_prob_base () : 0;
tmp *= BLOCK_INFO (e->src)->frequency;
tmp *= real_inv_br_prob_base;
frequency += tmp;
e->probability.to_sreal () : 0;
frequency += tmp * BLOCK_INFO (e->src)->frequency;
}
if (cyclic_probability == 0)
@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit)
}
else
{
if (cyclic_probability > real_almost_one)
cyclic_probability = real_almost_one;
if (cyclic_probability > max_cyclic_prob)
{
if (dump_file)
fprintf (dump_file,
"cyclic probability of bb %i is %f (capped to %f)"
"; turning freq %f",
bb->index, cyclic_probability.to_double (),
max_cyclic_prob.to_double (),
frequency.to_double ());
cyclic_probability = max_cyclic_prob;
}
else if (dump_file)
fprintf (dump_file,
"cyclic probability of bb %i is %f; turning freq %f",
bb->index, cyclic_probability.to_double (),
frequency.to_double ());
/* BLOCK_INFO (bb)->frequency = frequency
/ (1 - cyclic_probability) */
cyclic_probability = sreal (1) - cyclic_probability;
BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
BLOCK_INFO (bb)->frequency = frequency
/ (sreal (1) - cyclic_probability);
if (dump_file)
fprintf (dump_file, " to %f\n",
BLOCK_INFO (bb)->frequency.to_double ());
}
}
@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit)
e = find_edge (bb, head);
if (e)
{
/* EDGE_INFO (e)->back_edge_prob
= ((e->probability * BLOCK_INFO (bb)->frequency)
/ REG_BR_PROB_BASE); */
/* FIXME: Graphite is producing edges with no profile. Once
this is fixed, drop this. */
sreal tmp = e->probability.initialized_p () ?
e->probability.to_reg_br_prob_base () : 0;
tmp *= BLOCK_INFO (bb)->frequency;
EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
e->probability.to_sreal () : 0;
EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
}
/* Propagate to successor blocks. */
@ -3396,7 +3395,7 @@ propagate_freq (basic_block head, bitmap tovisit)
/* Estimate frequencies in loops at same nest level. */
static void
estimate_loops_at_level (class loop *first_loop)
estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
{
class loop *loop;
@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop)
unsigned i;
auto_bitmap tovisit;
estimate_loops_at_level (loop->inner);
estimate_loops_at_level (loop->inner, max_cyclic_prob);
/* Find current loop back edge and mark it. */
e = loop_latch_edge (loop);
@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop)
for (i = 0; i < loop->num_nodes; i++)
bitmap_set_bit (tovisit, bbs[i]->index);
free (bbs);
propagate_freq (loop->header, tovisit);
propagate_freq (loop->header, tovisit, max_cyclic_prob);
}
}
@ -3428,17 +3427,18 @@ estimate_loops (void)
{
auto_bitmap tovisit;
basic_block bb;
sreal max_cyclic_prob = (sreal)1 - (sreal)1 / param_max_predicted_iterations;
/* Start by estimating the frequencies in the loops. */
if (number_of_loops (cfun) > 1)
estimate_loops_at_level (current_loops->tree_root->inner);
estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
/* Now propagate the frequencies through all the blocks. */
FOR_ALL_BB_FN (bb, cfun)
{
bitmap_set_bit (tovisit, bb->index);
}
propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
}
/* Drop the profile for NODE to guessed, and update its frequency based on
@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force)
if (force || profile_status_for_fn (cfun) != PROFILE_READ
|| !update_max_bb_count ())
{
static int real_values_initialized = 0;
if (!real_values_initialized)
{
real_values_initialized = 1;
real_br_prob_base = REG_BR_PROB_BASE;
/* Scaling frequencies up to maximal profile count may result in
frequent overflows especially when inlining loops.
Small scalling results in unnecesary precision loss. Stay in
the half of the (exponential) range. */
real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
real_one_half = sreal (1, -1);
real_inv_br_prob_base = sreal (1) / real_br_prob_base;
real_almost_one = sreal (1) - real_inv_br_prob_base;
}
mark_dfs_back_edges ();
@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force)
this is fixed, drop this. */
if (e->probability.initialized_p ())
EDGE_INFO (e)->back_edge_prob
= e->probability.to_reg_br_prob_base ();
= e->probability.to_sreal ();
else
EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
/* back_edge_prob = 0.5 */
EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
}
}
@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force)
if (freq_max < BLOCK_INFO (bb)->frequency)
freq_max = BLOCK_INFO (bb)->frequency;
freq_max = real_bb_freq_max / freq_max;
/* Scaling frequencies up to maximal profile count may result in
frequent overflows especially when inlining loops.
Small scalling results in unnecesary precision loss. Stay in
the half of the (exponential) range. */
freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
if (freq_max < 16)
freq_max = 16;
profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{
sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + sreal (1, -1);
profile_count count = profile_count::from_gcov_type (tmp.to_int ());
/* If we have profile feedback in which this function was never

View File

@ -446,3 +446,12 @@ profile_probability::combine_with_count (profile_count count1,
else
return *this * even () + other * even ();
}
/* Return probability as sreal in range [0, 1]. */
sreal
profile_probability::to_sreal () const
{
gcc_checking_assert (initialized_p ());
return ((sreal)m_val) >> (n_bits - 2);
}

View File

@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
struct function;
struct profile_count;
class sreal;
/* Quality of the profile count. Because gengtype does not support enums
inside of classes, this is in global namespace. */
@ -614,6 +615,8 @@ public:
profile_probability other,
profile_count count2) const;
/* Return probability as sreal. */
sreal to_sreal () const;
/* LTO streaming support. */
static profile_probability stream_in (class lto_input_block *);
void stream_out (struct output_block *);
@ -674,8 +677,6 @@ public:
*/
class sreal;
struct GTY(()) profile_count
{
public: