mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-02 08:40:26 +08:00
re PR tree-optimization/64058 (Performance degradation after r216304)
PR tree-optimization/64058 * tree-ssa-coalesce.c (struct coalesce_pair): Add new field CONFLICT_COUNT. (struct ssa_conflicts): Move up earlier in the file. (conflicts_, var_map_): New static variables. (initialize_conflict_count): New function to initialize the CONFLICT_COUNT field for each conflict pair. (compare_pairs): Lazily initialize the conflict count and use it as the first tie-breaker. (sort_coalesce_list): Add new arguments conflicts, map. Initialize and wipe conflicts_ and map_ around the call to qsort. Remove special case for 2 coalesce pairs. * bitmap.c (bitmap_count_unique_bits): New function. (bitmap_count_bits_in_word): New function, extracted from bitmap_count_bits. (bitmap_count_bits): Use bitmap_count_bits_in_word. * bitmap.h (bitmap_count_unique_bits): Declare it. From-SVN: r234425
This commit is contained in:
parent
b01e88e56b
commit
478baf913e
@ -1,3 +1,23 @@
|
||||
2016-03-23 Jeff Law <law@redhat.com>
|
||||
|
||||
PR tree-optimization/64058
|
||||
* tree-ssa-coalesce.c (struct coalesce_pair): Add new field
|
||||
CONFLICT_COUNT.
|
||||
(struct ssa_conflicts): Move up earlier in the file.
|
||||
(conflicts_, var_map_): New static variables.
|
||||
(initialize_conflict_count): New function to initialize the
|
||||
CONFLICT_COUNT field for each conflict pair.
|
||||
(compare_pairs): Lazily initialize the conflict count and use it
|
||||
as the first tie-breaker.
|
||||
(sort_coalesce_list): Add new arguments conflicts, map. Initialize
|
||||
and wipe conflicts_ and map_ around the call to qsort. Remove
|
||||
special case for 2 coalesce pairs.
|
||||
* bitmap.c (bitmap_count_unique_bits): New function.
|
||||
(bitmap_count_bits_in_word): New function, extracted from
|
||||
bitmap_count_bits.
|
||||
(bitmap_count_bits): Use bitmap_count_bits_in_word.
|
||||
* bitmap.h (bitmap_count_unique_bits): Declare it.
|
||||
|
||||
2016-03-23 Ilya Enkovich <enkovich.gnu@gmail.com>
|
||||
|
||||
PR target/69917
|
||||
|
63
gcc/bitmap.c
63
gcc/bitmap.c
@ -662,6 +662,26 @@ bitmap_popcount (BITMAP_WORD a)
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Count and return the number of bits set in the bitmap word BITS. */
|
||||
static unsigned long
|
||||
bitmap_count_bits_in_word (const BITMAP_WORD *bits)
|
||||
{
|
||||
unsigned long count = 0;
|
||||
|
||||
for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
|
||||
{
|
||||
#if GCC_VERSION >= 3400
|
||||
/* Note that popcountl matches BITMAP_WORD in type, so the actual size
|
||||
of BITMAP_WORD is not material. */
|
||||
count += __builtin_popcountl (bits[ix]);
|
||||
#else
|
||||
count += bitmap_popcount (bits[ix]);
|
||||
#endif
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Count the number of bits set in the bitmap, and return it. */
|
||||
|
||||
unsigned long
|
||||
@ -669,19 +689,44 @@ bitmap_count_bits (const_bitmap a)
|
||||
{
|
||||
unsigned long count = 0;
|
||||
const bitmap_element *elt;
|
||||
unsigned ix;
|
||||
|
||||
for (elt = a->first; elt; elt = elt->next)
|
||||
count += bitmap_count_bits_in_word (elt->bits);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Count the number of unique bits set in A and B and return it. */
|
||||
|
||||
unsigned long
|
||||
bitmap_count_unique_bits (const_bitmap a, const_bitmap b)
|
||||
{
|
||||
unsigned long count = 0;
|
||||
const bitmap_element *elt_a, *elt_b;
|
||||
|
||||
for (elt_a = a->first, elt_b = b->first; elt_a && elt_b; )
|
||||
{
|
||||
for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
|
||||
/* If we're at different indices, then count all the bits
|
||||
in the lower element. If we're at the same index, then
|
||||
count the bits in the IOR of the two elements. */
|
||||
if (elt_a->indx < elt_b->indx)
|
||||
{
|
||||
#if GCC_VERSION >= 3400
|
||||
/* Note that popcountl matches BITMAP_WORD in type, so the actual size
|
||||
of BITMAP_WORD is not material. */
|
||||
count += __builtin_popcountl (elt->bits[ix]);
|
||||
#else
|
||||
count += bitmap_popcount (elt->bits[ix]);
|
||||
#endif
|
||||
count += bitmap_count_bits_in_word (elt_a->bits);
|
||||
elt_a = elt_a->next;
|
||||
}
|
||||
else if (elt_b->indx < elt_a->indx)
|
||||
{
|
||||
count += bitmap_count_bits_in_word (elt_b->bits);
|
||||
elt_b = elt_b->next;
|
||||
}
|
||||
else
|
||||
{
|
||||
BITMAP_WORD bits[BITMAP_ELEMENT_WORDS];
|
||||
for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
|
||||
bits[ix] = elt_a->bits[ix] | elt_b->bits[ix];
|
||||
count += bitmap_count_bits_in_word (bits);
|
||||
elt_a = elt_a->next;
|
||||
elt_b = elt_b->next;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
|
@ -280,6 +280,9 @@ extern bool bitmap_single_bit_set_p (const_bitmap);
|
||||
/* Count the number of bits set in the bitmap. */
|
||||
extern unsigned long bitmap_count_bits (const_bitmap);
|
||||
|
||||
/* Count the number of unique bits set across the two bitmaps. */
|
||||
extern unsigned long bitmap_count_unique_bits (const_bitmap, const_bitmap);
|
||||
|
||||
/* Boolean operations on bitmaps. The _into variants are two operand
|
||||
versions that modify the first source operand. The other variants
|
||||
are three operand versions that to not destroy the source bitmaps.
|
||||
|
@ -51,12 +51,40 @@ struct coalesce_pair
|
||||
int second_element;
|
||||
int cost;
|
||||
|
||||
/* A count of the number of unique partitions this pair would conflict
|
||||
with if coalescing was successful. This is the secondary sort key,
|
||||
given two pairs with equal costs, we will prefer the pair with a smaller
|
||||
conflict set.
|
||||
|
||||
This is lazily initialized when we discover two coalescing pairs have
|
||||
the same primary cost.
|
||||
|
||||
Note this is not updated and propagated as pairs are coalesced. */
|
||||
int conflict_count;
|
||||
|
||||
/* The order in which coalescing pairs are discovered is recorded in this
|
||||
field, which is used as the final tie breaker when sorting coalesce
|
||||
pairs. */
|
||||
int index;
|
||||
};
|
||||
|
||||
/* This represents a conflict graph. Implemented as an array of bitmaps.
|
||||
A full matrix is used for conflicts rather than just upper triangular form.
|
||||
this make sit much simpler and faster to perform conflict merges. */
|
||||
|
||||
struct ssa_conflicts
|
||||
{
|
||||
bitmap_obstack obstack; /* A place to allocate our bitmaps. */
|
||||
vec<bitmap> conflicts;
|
||||
};
|
||||
|
||||
/* The narrow API of the qsort comparison function doesn't allow easy
|
||||
access to additional arguments. So we have two globals (ick) to hold
|
||||
the data we need. They're initialized before the call to qsort and
|
||||
wiped immediately after. */
|
||||
static ssa_conflicts *conflicts_;
|
||||
static var_map map_;
|
||||
|
||||
/* Coalesce pair hashtable helpers. */
|
||||
|
||||
struct coalesce_pair_hasher : nofree_ptr_hash <coalesce_pair>
|
||||
@ -303,6 +331,7 @@ find_coalesce_pair (coalesce_list *cl, int p1, int p2, bool create)
|
||||
pair->second_element = p.second_element;
|
||||
pair->cost = 0;
|
||||
pair->index = num_coalesce_pairs (cl);
|
||||
pair->conflict_count = 0;
|
||||
*slot = pair;
|
||||
}
|
||||
|
||||
@ -345,21 +374,66 @@ add_coalesce (coalesce_list *cl, int p1, int p2, int value)
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute and record how many unique conflicts would exist for the
|
||||
representative partition for each coalesce pair in CL.
|
||||
|
||||
CONFLICTS is the conflict graph and MAP is the current partition view. */
|
||||
|
||||
static void
|
||||
initialize_conflict_count (coalesce_pair *p,
|
||||
ssa_conflicts *conflicts,
|
||||
var_map map)
|
||||
{
|
||||
int p1 = var_to_partition (map, ssa_name (p->first_element));
|
||||
int p2 = var_to_partition (map, ssa_name (p->second_element));
|
||||
|
||||
/* 4 cases. If both P1 and P2 have conflicts, then build their
|
||||
union and count the members. Else handle the degenerate cases
|
||||
in the obvious ways. */
|
||||
if (conflicts->conflicts[p1] && conflicts->conflicts[p2])
|
||||
p->conflict_count = bitmap_count_unique_bits (conflicts->conflicts[p1],
|
||||
conflicts->conflicts[p2]);
|
||||
else if (conflicts->conflicts[p1])
|
||||
p->conflict_count = bitmap_count_bits (conflicts->conflicts[p1]);
|
||||
else if (conflicts->conflicts[p2])
|
||||
p->conflict_count = bitmap_count_bits (conflicts->conflicts[p2]);
|
||||
else
|
||||
p->conflict_count = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Comparison function to allow qsort to sort P1 and P2 in Ascending order. */
|
||||
|
||||
static int
|
||||
compare_pairs (const void *p1, const void *p2)
|
||||
{
|
||||
const coalesce_pair *const *const pp1 = (const coalesce_pair *const *) p1;
|
||||
const coalesce_pair *const *const pp2 = (const coalesce_pair *const *) p2;
|
||||
coalesce_pair *const *const pp1 = (coalesce_pair *const *) p1;
|
||||
coalesce_pair *const *const pp2 = (coalesce_pair *const *) p2;
|
||||
int result;
|
||||
|
||||
result = (* pp1)->cost - (* pp2)->cost;
|
||||
/* And if everything else is equal, then sort based on which
|
||||
coalesce pair was found first. */
|
||||
/* We use the size of the resulting conflict set as the secondary sort key.
|
||||
Given two equal costing coalesce pairs, we want to prefer the pair that
|
||||
has the smaller conflict set. */
|
||||
if (result == 0)
|
||||
result = (*pp2)->index - (*pp1)->index;
|
||||
{
|
||||
if (flag_expensive_optimizations)
|
||||
{
|
||||
/* Lazily initialize the conflict counts as it's fairly expensive
|
||||
to compute. */
|
||||
if ((*pp2)->conflict_count == 0)
|
||||
initialize_conflict_count (*pp2, conflicts_, map_);
|
||||
if ((*pp1)->conflict_count == 0)
|
||||
initialize_conflict_count (*pp1, conflicts_, map_);
|
||||
|
||||
result = (*pp2)->conflict_count - (*pp1)->conflict_count;
|
||||
}
|
||||
|
||||
/* And if everything else is equal, then sort based on which
|
||||
coalesce pair was found first. */
|
||||
if (result == 0)
|
||||
result = (*pp2)->index - (*pp1)->index;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -374,7 +448,7 @@ compare_pairs (const void *p1, const void *p2)
|
||||
in order from most important coalesce to least important. */
|
||||
|
||||
static void
|
||||
sort_coalesce_list (coalesce_list *cl)
|
||||
sort_coalesce_list (coalesce_list *cl, ssa_conflicts *conflicts, var_map map)
|
||||
{
|
||||
unsigned x, num;
|
||||
coalesce_pair *p;
|
||||
@ -400,19 +474,14 @@ sort_coalesce_list (coalesce_list *cl)
|
||||
if (num == 1)
|
||||
return;
|
||||
|
||||
/* If there are only 2, just pick swap them if the order isn't correct. */
|
||||
if (num == 2)
|
||||
{
|
||||
if (cl->sorted[0]->cost > cl->sorted[1]->cost)
|
||||
std::swap (cl->sorted[0], cl->sorted[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only call qsort if there are more than 2 items.
|
||||
??? Maybe std::sort will do better, provided that compare_pairs
|
||||
can be inlined. */
|
||||
if (num > 2)
|
||||
qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
|
||||
/* We don't want to depend on qsort_r, so we have to stuff away
|
||||
additional data into globals so it can be referenced in
|
||||
compare_pairs. */
|
||||
conflicts_ = conflicts;
|
||||
map_ = map;
|
||||
qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
|
||||
conflicts_ = NULL;
|
||||
map_ = NULL;
|
||||
}
|
||||
|
||||
|
||||
@ -437,7 +506,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
|
||||
print_generic_expr (f, var1, TDF_SLIM);
|
||||
fprintf (f, " <-> ");
|
||||
print_generic_expr (f, var2, TDF_SLIM);
|
||||
fprintf (f, " (%1d), ", node->cost);
|
||||
fprintf (f, " (%1d, %1d), ", node->cost, node->conflict_count);
|
||||
fprintf (f, "\n");
|
||||
}
|
||||
}
|
||||
@ -447,7 +516,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
|
||||
for (x = cl->num_sorted - 1 ; x >=0; x--)
|
||||
{
|
||||
node = cl->sorted[x];
|
||||
fprintf (f, "(%d) ", node->cost);
|
||||
fprintf (f, "(%d, %d) ", node->cost, node->conflict_count);
|
||||
var = ssa_name (node->first_element);
|
||||
print_generic_expr (f, var, TDF_SLIM);
|
||||
fprintf (f, " <-> ");
|
||||
@ -459,16 +528,6 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
|
||||
}
|
||||
|
||||
|
||||
/* This represents a conflict graph. Implemented as an array of bitmaps.
|
||||
A full matrix is used for conflicts rather than just upper triangular form.
|
||||
this make sit much simpler and faster to perform conflict merges. */
|
||||
|
||||
struct ssa_conflicts
|
||||
{
|
||||
bitmap_obstack obstack; /* A place to allocate our bitmaps. */
|
||||
vec<bitmap> conflicts;
|
||||
};
|
||||
|
||||
/* Return an empty new conflict graph for SIZE elements. */
|
||||
|
||||
static inline ssa_conflicts *
|
||||
@ -1800,7 +1859,7 @@ coalesce_ssa_name (void)
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
ssa_conflicts_dump (dump_file, graph);
|
||||
|
||||
sort_coalesce_list (cl);
|
||||
sort_coalesce_list (cl, graph, map);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user