mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 00:31:30 +08:00
tree-vrp.c (finalize_jump_threads): Do not care about dominance info.
* tree-vrp.c (finalize_jump_threads): Do not care about dominance info. (execute_vrp): Preserve loops through jump threading. * tree-ssa-threadupdate.c (thread_single_edge, dbds_continue_enumeration_p, determine_bb_domination_status, thread_through_loop_header): New functions. (create_edge_and_update_destination_phis, create_edge_and_update_destination_phis): Set loops for the new blocks. (prune_undesirable_thread_requests): Removed. (redirect_edges): Do not pretend that redirect_edge_and_branch can create new blocks. (thread_block): Do not call prune_undesirable_thread_requests. Update loops. (mark_threaded_blocks): Select edges to thread here. (thread_through_all_blocks): Take may_peel_loop_headers argument. Thread edges through loop headers independently. * cfgloopmanip.c (create_preheader, mfb_keep_just): Export. * tree-pass.h (TODO_mark_first_instance): New. (first_pass_instance): Declare. * cfghooks.c (duplicate_block): Put the block to the original loop if copy is not specified. * tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through jump threading. Pass may_peel_loop_headers to thread_through_all_blocks according to first_pass_instance. * cfgloop.h (create_preheader): Declare. * tree-flow.h (thread_through_all_blocks): Declaration changed. * basic-block.h (mfb_keep_just, mfb_kj_edge): Declare. * passes.c (first_pass_instance): New variable. (next_pass_1): Set TODO_mark_first_instance. (execute_todo): Set first_pass_instance. * gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test. * gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c, gcc.dg/vect/vect-104.c: Use more complex construction to prevent vectorizing. * gcc.dg/tree-ssa/pr21559.c: Update outcome. From-SVN: r124786
This commit is contained in:
parent
d2594859a6
commit
b02b9b53ec
@ -1,3 +1,35 @@
|
||||
2007-05-17 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* tree-vrp.c (finalize_jump_threads): Do not care about dominance info.
|
||||
(execute_vrp): Preserve loops through jump threading.
|
||||
* tree-ssa-threadupdate.c (thread_single_edge,
|
||||
dbds_continue_enumeration_p, determine_bb_domination_status,
|
||||
thread_through_loop_header): New functions.
|
||||
(create_edge_and_update_destination_phis,
|
||||
create_edge_and_update_destination_phis): Set loops for the new blocks.
|
||||
(prune_undesirable_thread_requests): Removed.
|
||||
(redirect_edges): Do not pretend that redirect_edge_and_branch can
|
||||
create new blocks.
|
||||
(thread_block): Do not call prune_undesirable_thread_requests.
|
||||
Update loops.
|
||||
(mark_threaded_blocks): Select edges to thread here.
|
||||
(thread_through_all_blocks): Take may_peel_loop_headers argument.
|
||||
Thread edges through loop headers independently.
|
||||
* cfgloopmanip.c (create_preheader, mfb_keep_just): Export.
|
||||
* tree-pass.h (TODO_mark_first_instance): New.
|
||||
(first_pass_instance): Declare.
|
||||
* cfghooks.c (duplicate_block): Put the block to the original loop
|
||||
if copy is not specified.
|
||||
* tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through
|
||||
jump threading. Pass may_peel_loop_headers to
|
||||
thread_through_all_blocks according to first_pass_instance.
|
||||
* cfgloop.h (create_preheader): Declare.
|
||||
* tree-flow.h (thread_through_all_blocks): Declaration changed.
|
||||
* basic-block.h (mfb_keep_just, mfb_kj_edge): Declare.
|
||||
* passes.c (first_pass_instance): New variable.
|
||||
(next_pass_1): Set TODO_mark_first_instance.
|
||||
(execute_todo): Set first_pass_instance.
|
||||
|
||||
2007-05-17 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR tree-optimization/24659
|
||||
|
@ -1173,4 +1173,8 @@ bb_has_eh_pred (basic_block bb)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* In cfgloopmanip.c. */
|
||||
extern edge mfb_kj_edge;
|
||||
bool mfb_keep_just (edge);
|
||||
|
||||
#endif /* GCC_BASIC_BLOCK_H */
|
||||
|
@ -923,9 +923,15 @@ duplicate_block (basic_block bb, edge e, basic_block after)
|
||||
set_bb_original (new_bb, bb);
|
||||
set_bb_copy (bb, new_bb);
|
||||
|
||||
/* Add the new block to the prescribed loop. */
|
||||
/* Add the new block to the copy of the loop of BB, or directly to the loop
|
||||
of BB if the loop is not being copied. */
|
||||
if (current_loops != NULL)
|
||||
add_bb_to_loop (new_bb, bb->loop_father->copy);
|
||||
{
|
||||
struct loop *cloop = bb->loop_father;
|
||||
if (cloop->copy)
|
||||
cloop = cloop->copy;
|
||||
add_bb_to_loop (new_bb, cloop);
|
||||
}
|
||||
|
||||
return new_bb;
|
||||
}
|
||||
|
@ -252,6 +252,7 @@ enum
|
||||
CP_SIMPLE_PREHEADERS = 1
|
||||
};
|
||||
|
||||
basic_block create_preheader (struct loop *, int);
|
||||
extern void create_preheaders (int);
|
||||
extern void force_single_succ_latches (void);
|
||||
|
||||
|
@ -41,7 +41,6 @@ static int find_path (edge, basic_block **);
|
||||
static void fix_loop_placements (struct loop *, bool *);
|
||||
static bool fix_bb_placement (basic_block);
|
||||
static void fix_bb_placements (basic_block, bool *);
|
||||
static basic_block create_preheader (struct loop *, int);
|
||||
static void unloop (struct loop *, bool *);
|
||||
|
||||
#define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
|
||||
@ -1085,8 +1084,8 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
||||
MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
|
||||
whether to redirect it. */
|
||||
|
||||
static edge mfb_kj_edge;
|
||||
static bool
|
||||
edge mfb_kj_edge;
|
||||
bool
|
||||
mfb_keep_just (edge e)
|
||||
{
|
||||
return e != mfb_kj_edge;
|
||||
@ -1097,7 +1096,7 @@ mfb_keep_just (edge e)
|
||||
entry; otherwise we also force preheader block to have only one successor.
|
||||
The function also updates dominators. */
|
||||
|
||||
static basic_block
|
||||
basic_block
|
||||
create_preheader (struct loop *loop, int flags)
|
||||
{
|
||||
edge e, fallthru;
|
||||
|
@ -105,6 +105,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
/* Global variables used to communicate with passes. */
|
||||
int dump_flags;
|
||||
bool in_gimple_form;
|
||||
bool first_pass_instance;
|
||||
|
||||
|
||||
/* This is called from various places for FUNCTION_DECL, VAR_DECL,
|
||||
@ -392,6 +393,8 @@ next_pass_1 (struct tree_opt_pass **list, struct tree_opt_pass *pass)
|
||||
memcpy (new, pass, sizeof (*new));
|
||||
new->next = NULL;
|
||||
|
||||
new->todo_flags_start &= ~TODO_mark_first_instance;
|
||||
|
||||
/* Indicate to register_dump_files that this pass has duplicates,
|
||||
and so it should rename the dump file. The first instance will
|
||||
be -1, and be number of duplicates = -static_pass_number - 1.
|
||||
@ -406,6 +409,7 @@ next_pass_1 (struct tree_opt_pass **list, struct tree_opt_pass *pass)
|
||||
}
|
||||
else
|
||||
{
|
||||
pass->todo_flags_start |= TODO_mark_first_instance;
|
||||
pass->static_pass_number = -1;
|
||||
*list = pass;
|
||||
}
|
||||
@ -932,6 +936,9 @@ execute_todo (unsigned int flags)
|
||||
gcc_assert (flags & TODO_update_ssa_any);
|
||||
#endif
|
||||
|
||||
/* Inform the pass whether it is the first time it is run. */
|
||||
first_pass_instance = (flags & TODO_mark_first_instance) != 0;
|
||||
|
||||
do_per_function (execute_function_todo, (void *)(size_t) flags);
|
||||
|
||||
/* Always remove functions just as before inlining: IPA passes might be
|
||||
|
@ -1,3 +1,10 @@
|
||||
2007-05-17 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test.
|
||||
* gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c,
|
||||
gcc.dg/vect/vect-104.c: Use more complex construction to prevent vectorizing.
|
||||
* gcc.dg/tree-ssa/pr21559.c: Update outcome.
|
||||
|
||||
2007-05-17 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR tree-optimization/24659
|
||||
|
@ -35,11 +35,9 @@ void foo (void)
|
||||
/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
|
||||
|
||||
/* Second, we should thread the edge out of the loop via the break
|
||||
statement. */
|
||||
/* { dg-final { scan-tree-dump-times "Threaded jump" 1 "vrp1" } } */
|
||||
|
||||
/* Now if we were really good, we'd realize that the final bytes == 0
|
||||
test is totally useless. That's not likely to happen anytime soon. */
|
||||
statement. We also realize that the final bytes == 0 test is useless,
|
||||
and thread over it. */
|
||||
/* { dg-final { scan-tree-dump-times "Threaded jump" 2 "vrp1" } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vrp1" } } */
|
||||
|
||||
|
119
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c
Normal file
119
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c
Normal file
@ -0,0 +1,119 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom1-stats" } */
|
||||
|
||||
void foo();
|
||||
void bla();
|
||||
void bar();
|
||||
|
||||
/* In the following two cases, we should be able to thread edge through
|
||||
the loop header. */
|
||||
|
||||
void thread_entry_through_header (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 170; i++)
|
||||
bla ();
|
||||
}
|
||||
|
||||
void thread_latch_through_header (void)
|
||||
{
|
||||
int i = 0;
|
||||
int first = 1;
|
||||
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
foo ();
|
||||
|
||||
first = 0;
|
||||
bla ();
|
||||
} while (i++ < 100);
|
||||
}
|
||||
|
||||
/* This is a TODO -- it is correct to thread both entry and latch edge through
|
||||
the header, but we do not handle this case yet. */
|
||||
|
||||
void dont_thread_1 (void)
|
||||
{
|
||||
int i = 0;
|
||||
int first = 1;
|
||||
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
bar ();
|
||||
|
||||
first = 0;
|
||||
bla ();
|
||||
} while (i++ < 100);
|
||||
}
|
||||
|
||||
/* Avoid threading in the following two cases, to prevent creating subloops. */
|
||||
|
||||
void dont_thread_2 (int first)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
bar ();
|
||||
|
||||
first = 0;
|
||||
bla ();
|
||||
} while (i++ < 100);
|
||||
}
|
||||
|
||||
void dont_thread_3 (int nfirst)
|
||||
{
|
||||
int i = 0;
|
||||
int first = 0;
|
||||
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
bar ();
|
||||
|
||||
first = nfirst;
|
||||
bla ();
|
||||
} while (i++ < 100);
|
||||
}
|
||||
|
||||
/* Avoid threading in this case, in order to avoid creating loop with
|
||||
multiple entries. */
|
||||
|
||||
void dont_thread_4 (int a, int nfirst)
|
||||
{
|
||||
int i = 0;
|
||||
int first;
|
||||
|
||||
if (a)
|
||||
first = 0;
|
||||
else
|
||||
first = 1;
|
||||
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
bar ();
|
||||
|
||||
first = nfirst;
|
||||
bla ();
|
||||
} while (i++ < 100);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "vrp1"} } */
|
||||
/* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 0 "vrp1"} } */
|
||||
/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 0 "dom1"} } */
|
||||
/* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 1 "dom1"} } */
|
||||
/* { dg-final { cleanup-tree-dump "dom1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vrp1" } } */
|
@ -14,6 +14,7 @@ struct extraction
|
||||
|
||||
static int a[N] = {1,2,3,4,5,6,7,8,9};
|
||||
static int b[N] = {2,3,4,5,6,7,8,9,9};
|
||||
volatile int foo;
|
||||
|
||||
int main1 (int x, int y) {
|
||||
int i;
|
||||
@ -23,7 +24,7 @@ int main1 (int x, int y) {
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
p->a[i] = a[i];
|
||||
if (x == 135)
|
||||
if (foo == 135)
|
||||
abort (); /* to avoid vectorization */
|
||||
}
|
||||
|
||||
@ -46,6 +47,7 @@ int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
foo = 0;
|
||||
return main1 (0, N);
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ struct extraction
|
||||
static int a[N] = {1,2,3,4,5,6,7,8,9};
|
||||
static int b[N] = {17,24,7,0,2,3,4,31,82};
|
||||
static int c[N] = {9,17,24,7,0,2,3,4,31};
|
||||
volatile int foo;
|
||||
|
||||
int main1 (int x, int y) {
|
||||
int i;
|
||||
@ -25,7 +26,7 @@ int main1 (int x, int y) {
|
||||
{
|
||||
p->a[i] = a[i];
|
||||
p->b[i] = b[i];
|
||||
if (x == 135)
|
||||
if (foo == 135)
|
||||
abort (); /* to avoid vectorization */
|
||||
}
|
||||
|
||||
@ -48,6 +49,7 @@ int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
foo = 0;
|
||||
return main1 (0, N);
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ struct extraction
|
||||
static int a[N][N] = {{1,2,3},{4,5,6},{7,8,9}};
|
||||
static int b[N][N] = {{17,24,7},{0,2,3},{4,31,82}};
|
||||
static int c[N][N] = {{1,2,3},{4,6,8},{8,9,9}};
|
||||
volatile int foo;
|
||||
|
||||
int main1 (int x) {
|
||||
int i,j;
|
||||
@ -27,7 +28,7 @@ int main1 (int x) {
|
||||
{
|
||||
p->a[i][j] = a[i][j];
|
||||
p->b[i][j] = b[i][j];
|
||||
if (x == 135)
|
||||
if (foo == 135)
|
||||
abort (); /* to avoid vectorization */
|
||||
}
|
||||
}
|
||||
@ -57,6 +58,7 @@ int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
foo = 0;
|
||||
return main1 (N);
|
||||
}
|
||||
|
||||
|
@ -1113,7 +1113,7 @@ bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode);
|
||||
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode);
|
||||
|
||||
/* In tree-ssa-threadupdate.c. */
|
||||
extern bool thread_through_all_blocks (void);
|
||||
extern bool thread_through_all_blocks (bool);
|
||||
extern void register_jump_thread (edge, edge);
|
||||
|
||||
/* In gimplify.c */
|
||||
|
@ -218,6 +218,9 @@ struct dump_file_info
|
||||
for the passes that are handed to register_dump_files. */
|
||||
#define TODO_set_props (1 << 15)
|
||||
|
||||
/* Internally used for the first instance of a pass. */
|
||||
#define TODO_mark_first_instance (1 << 16)
|
||||
|
||||
#define TODO_update_ssa_any \
|
||||
(TODO_update_ssa \
|
||||
| TODO_update_ssa_no_phi \
|
||||
@ -417,4 +420,13 @@ extern struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes;
|
||||
extern void execute_pass_list (struct tree_opt_pass *);
|
||||
extern void execute_ipa_pass_list (struct tree_opt_pass *);
|
||||
|
||||
/* Set to true if the pass is called the first time during compilation of the
|
||||
current function. Note that using this information in the optimization
|
||||
passes is considered not to be clean, and it should be avoided if possible.
|
||||
This flag is currently used to prevent loops from being peeled repeatedly
|
||||
in jump threading; it will be removed once we preserve loop structures
|
||||
throughout the compilation -- we will be able to mark the affected loops
|
||||
directly in jump threading, and avoid peeling them next time. */
|
||||
extern bool first_pass_instance;
|
||||
|
||||
#endif /* GCC_TREE_PASS_H */
|
||||
|
@ -277,25 +277,17 @@ tree_ssa_dominator_optimize (void)
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
cfg_altered = false;
|
||||
|
||||
/* We need to know which edges exit loops so that we can
|
||||
aggressively thread through loop headers to an exit
|
||||
edge. */
|
||||
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
||||
if (current_loops)
|
||||
{
|
||||
mark_loop_exit_edges ();
|
||||
loop_optimizer_finalize ();
|
||||
}
|
||||
|
||||
/* Clean up the CFG so that any forwarder blocks created by loop
|
||||
canonicalization are removed. */
|
||||
cleanup_tree_cfg ();
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
/* We need to know loop structures in order to avoid destroying them
|
||||
in jump threading. Note that we still can e.g. thread through loop
|
||||
headers to an exit edge, or through loop header to the loop body, assuming
|
||||
that we update the loop info. */
|
||||
loop_optimizer_init (LOOPS_HAVE_SIMPLE_LATCHES);
|
||||
|
||||
/* We need accurate information regarding back edges in the CFG
|
||||
for jump threading. */
|
||||
for jump threading; this may include back edes that are not part of
|
||||
a single loop. */
|
||||
mark_dfs_back_edges ();
|
||||
|
||||
|
||||
/* Recursively walk the dominator tree optimizing statements. */
|
||||
walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
|
||||
|
||||
@ -319,7 +311,7 @@ tree_ssa_dominator_optimize (void)
|
||||
free_all_edge_infos ();
|
||||
|
||||
/* Thread jumps, creating duplicate blocks as needed. */
|
||||
cfg_altered |= thread_through_all_blocks ();
|
||||
cfg_altered |= thread_through_all_blocks (first_pass_instance);
|
||||
|
||||
if (cfg_altered)
|
||||
free_dominance_info (CDI_DOMINATORS);
|
||||
@ -353,6 +345,8 @@ tree_ssa_dominator_optimize (void)
|
||||
if (dump_file && (dump_flags & TDF_STATS))
|
||||
dump_dominator_optimization_stats (dump_file);
|
||||
|
||||
loop_optimizer_finalize ();
|
||||
|
||||
/* Delete our main hashtable. */
|
||||
htab_delete (avail_exprs);
|
||||
|
||||
|
@ -315,6 +315,7 @@ create_edge_and_update_destination_phis (struct redirection_data *rd)
|
||||
|
||||
e->probability = REG_BR_PROB_BASE;
|
||||
e->count = rd->dup_block->count;
|
||||
e->aux = rd->outgoing_edge->aux;
|
||||
|
||||
/* If there are any PHI nodes at the destination of the outgoing edge
|
||||
from the duplicate block, then we will need to add a new argument
|
||||
@ -385,199 +386,6 @@ fixup_template_block (void **slot, void *data)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Not all jump threading requests are useful. In particular some
|
||||
jump threading requests can create irreducible regions which are
|
||||
undesirable.
|
||||
|
||||
This routine will examine the BB's incoming edges for jump threading
|
||||
requests which, if acted upon, would create irreducible regions. Any
|
||||
such jump threading requests found will be pruned away. */
|
||||
|
||||
static void
|
||||
prune_undesirable_thread_requests (basic_block bb)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
bool may_create_irreducible_region = false;
|
||||
unsigned int num_outgoing_edges_into_loop = 0;
|
||||
|
||||
/* For the heuristics below, we need to know if BB has more than
|
||||
one outgoing edge into a loop. */
|
||||
FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0);
|
||||
|
||||
if (num_outgoing_edges_into_loop > 1)
|
||||
{
|
||||
edge backedge = NULL;
|
||||
|
||||
/* Consider the effect of threading the edge (0, 1) to 2 on the left
|
||||
CFG to produce the right CFG:
|
||||
|
||||
|
||||
0 0
|
||||
| |
|
||||
1<--+ 2<--------+
|
||||
/ \ | | |
|
||||
2 3 | 4<----+ |
|
||||
\ / | / \ | |
|
||||
4---+ E 1-- | --+
|
||||
| | |
|
||||
E 3---+
|
||||
|
||||
|
||||
Threading the (0, 1) edge to 2 effectively creates two loops
|
||||
(2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested.
|
||||
This is not good.
|
||||
|
||||
However, we do need to be able to thread (0, 1) to 2 or 3
|
||||
in the left CFG below (which creates the middle and right
|
||||
CFGs with nested loops).
|
||||
|
||||
0 0 0
|
||||
| | |
|
||||
1<--+ 2<----+ 3<-+<-+
|
||||
/| | | | | | |
|
||||
2 | | 3<-+ | 1--+ |
|
||||
\| | | | | | |
|
||||
3---+ 1--+--+ 2-----+
|
||||
|
||||
|
||||
A safe heuristic appears to be to only allow threading if BB
|
||||
has a single incoming backedge from one of its direct successors. */
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
{
|
||||
if (e->flags & EDGE_DFS_BACK)
|
||||
{
|
||||
if (backedge)
|
||||
{
|
||||
backedge = NULL;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
backedge = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (backedge && find_edge (bb, backedge->src))
|
||||
;
|
||||
else
|
||||
may_create_irreducible_region = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
edge dest = NULL;
|
||||
|
||||
/* If we thread across the loop entry block (BB) into the
|
||||
loop and BB is still reached from outside the loop, then
|
||||
we would create an irreducible CFG. Consider the effect
|
||||
of threading the edge (1, 4) to 5 on the left CFG to produce
|
||||
the right CFG
|
||||
|
||||
0 0
|
||||
/ \ / \
|
||||
1 2 1 2
|
||||
\ / | |
|
||||
4<----+ 5<->4
|
||||
/ \ | |
|
||||
E 5---+ E
|
||||
|
||||
|
||||
Threading the (1, 4) edge to 5 creates two entry points
|
||||
into the loop (4, 5) (one from block 1, the other from
|
||||
block 2). A classic irreducible region.
|
||||
|
||||
So look at all of BB's incoming edges which are not
|
||||
backedges and which are not threaded to the loop exit.
|
||||
If that subset of incoming edges do not all thread
|
||||
to the same block, then threading any of them will create
|
||||
an irreducible region. */
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
{
|
||||
edge e2;
|
||||
|
||||
/* We ignore back edges for now. This may need refinement
|
||||
as threading a backedge creates an inner loop which
|
||||
we would need to verify has a single entry point.
|
||||
|
||||
If all backedges thread to new locations, then this
|
||||
block will no longer have incoming backedges and we
|
||||
need not worry about creating irreducible regions
|
||||
by threading through BB. I don't think this happens
|
||||
enough in practice to worry about it. */
|
||||
if (e->flags & EDGE_DFS_BACK)
|
||||
continue;
|
||||
|
||||
/* If the incoming edge threads to the loop exit, then it
|
||||
is clearly safe. */
|
||||
e2 = e->aux;
|
||||
if (e2 && (e2->flags & EDGE_LOOP_EXIT))
|
||||
continue;
|
||||
|
||||
/* E enters the loop header and is not threaded. We can
|
||||
not allow any other incoming edges to thread into
|
||||
the loop as that would create an irreducible region. */
|
||||
if (!e2)
|
||||
{
|
||||
may_create_irreducible_region = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* We know that this incoming edge threads to a block inside
|
||||
the loop. This edge must thread to the same target in
|
||||
the loop as any previously seen threaded edges. Otherwise
|
||||
we will create an irreducible region. */
|
||||
if (!dest)
|
||||
dest = e2;
|
||||
else if (e2 != dest)
|
||||
{
|
||||
may_create_irreducible_region = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we might create an irreducible region, then cancel any of
|
||||
the jump threading requests for incoming edges which are
|
||||
not backedges and which do not thread to the exit block. */
|
||||
if (may_create_irreducible_region)
|
||||
{
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
{
|
||||
edge e2;
|
||||
|
||||
/* Ignore back edges. */
|
||||
if (e->flags & EDGE_DFS_BACK)
|
||||
continue;
|
||||
|
||||
e2 = e->aux;
|
||||
|
||||
/* If this incoming edge was not threaded, then there is
|
||||
nothing to do. */
|
||||
if (!e2)
|
||||
continue;
|
||||
|
||||
/* If this incoming edge threaded to the loop exit,
|
||||
then it can be ignored as it is safe. */
|
||||
if (e2->flags & EDGE_LOOP_EXIT)
|
||||
continue;
|
||||
|
||||
if (e2)
|
||||
{
|
||||
/* This edge threaded into the loop and the jump thread
|
||||
request must be cancelled. */
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, " Not threading jump %d --> %d to %d\n",
|
||||
e->src->index, e->dest->index, e2->dest->index);
|
||||
e->aux = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Hash table traversal callback to redirect each incoming edge
|
||||
associated with this hash table element to its new destination. */
|
||||
|
||||
@ -620,11 +428,8 @@ redirect_edges (void **slot, void *data)
|
||||
/* Redirect the incoming edge to the appropriate duplicate
|
||||
block. */
|
||||
e2 = redirect_edge_and_branch (e, rd->dup_block);
|
||||
gcc_assert (e == e2);
|
||||
flush_pending_stmts (e2);
|
||||
|
||||
if ((dump_file && (dump_flags & TDF_DETAILS))
|
||||
&& e->src != e2->src)
|
||||
fprintf (dump_file, " basic block %d created\n", e2->src->index);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -696,46 +501,23 @@ redirection_block_p (basic_block bb)
|
||||
successor of BB. We then revector the incoming edges into BB to
|
||||
the appropriate duplicate of BB.
|
||||
|
||||
BB and its duplicates will have assignments to the same set of
|
||||
SSA_NAMEs. Right now, we just call into update_ssa to update the
|
||||
SSA graph for those names.
|
||||
|
||||
We are also going to experiment with a true incremental update
|
||||
scheme for the duplicated resources. One of the interesting
|
||||
properties we can exploit here is that all the resources set
|
||||
in BB will have the same IDFS, so we have one IDFS computation
|
||||
per block with incoming threaded edges, which can lower the
|
||||
cost of the true incremental update algorithm. */
|
||||
If NOLOOP_ONLY is true, we only perform the threading as long as it
|
||||
does not affect the structure of the loops in a nontrivial way. */
|
||||
|
||||
static bool
|
||||
thread_block (basic_block bb)
|
||||
thread_block (basic_block bb, bool noloop_only)
|
||||
{
|
||||
/* E is an incoming edge into BB that we may or may not want to
|
||||
redirect to a duplicate of BB. */
|
||||
edge e;
|
||||
edge e, e2;
|
||||
edge_iterator ei;
|
||||
struct local_info local_info;
|
||||
|
||||
/* FOUND_BACKEDGE indicates that we found an incoming backedge
|
||||
into BB, in which case we may ignore certain jump threads
|
||||
to avoid creating irreducible regions. */
|
||||
bool found_backedge = false;
|
||||
struct loop *loop = bb->loop_father;
|
||||
|
||||
/* ALL indicates whether or not all incoming edges into BB should
|
||||
be threaded to a duplicate of BB. */
|
||||
bool all = true;
|
||||
|
||||
/* If optimizing for size, only thread this block if we don't have
|
||||
to duplicate it or it's an otherwise empty redirection block. */
|
||||
if (optimize_size
|
||||
&& EDGE_COUNT (bb->preds) > 1
|
||||
&& !redirection_block_p (bb))
|
||||
{
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
e->aux = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* To avoid scanning a linear array for the element we need we instead
|
||||
use a hash table. For normal code there should be no noticeable
|
||||
difference. However, if we have a block with a large number of
|
||||
@ -745,35 +527,45 @@ thread_block (basic_block bb)
|
||||
redirection_data_eq,
|
||||
free);
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0);
|
||||
/* If we thread the latch of the loop to its exit, the loop ceases to
|
||||
exist. Make sure we do not restrict ourselves in order to preserve
|
||||
this loop. */
|
||||
if (current_loops && loop->header == bb)
|
||||
{
|
||||
e = loop_latch_edge (loop);
|
||||
e2 = e->aux;
|
||||
|
||||
/* If BB has incoming backedges, then threading across BB might
|
||||
introduce an irreducible region, which would be undesirable
|
||||
as that inhibits various optimizations later. Prune away
|
||||
any jump threading requests which we know will result in
|
||||
an irreducible region. */
|
||||
if (found_backedge)
|
||||
prune_undesirable_thread_requests (bb);
|
||||
if (e2 && loop_exit_edge_p (loop, e2))
|
||||
{
|
||||
loop->header = NULL;
|
||||
loop->latch = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Record each unique threaded destination into a hash table for
|
||||
efficient lookups. */
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
{
|
||||
if (!e->aux)
|
||||
e2 = e->aux;
|
||||
|
||||
if (!e2
|
||||
/* If NOLOOP_ONLY is true, we only allow threading through the
|
||||
header of a loop to exit edges. */
|
||||
|| (noloop_only
|
||||
&& current_loops
|
||||
&& bb == bb->loop_father->header
|
||||
&& !loop_exit_edge_p (bb->loop_father, e2)))
|
||||
{
|
||||
all = false;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
edge e2 = e->aux;
|
||||
update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
|
||||
e->count, e->aux);
|
||||
|
||||
/* Insert the outgoing edge into the hash table if it is not
|
||||
already in the hash table. */
|
||||
lookup_redirection_data (e2, e, INSERT);
|
||||
}
|
||||
update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
|
||||
e->count, e->aux);
|
||||
|
||||
/* Insert the outgoing edge into the hash table if it is not
|
||||
already in the hash table. */
|
||||
lookup_redirection_data (e2, e, INSERT);
|
||||
}
|
||||
|
||||
/* If we are going to thread all incoming edges to an outgoing edge, then
|
||||
@ -821,6 +613,339 @@ thread_block (basic_block bb)
|
||||
return local_info.jumps_threaded;
|
||||
}
|
||||
|
||||
/* Threads edge E through E->dest to the edge E->aux. Returns the copy
|
||||
of E->dest created during threading, or E->dest if it was not necessary
|
||||
to copy it (E is its single predecessor). */
|
||||
|
||||
static basic_block
|
||||
thread_single_edge (edge e)
|
||||
{
|
||||
basic_block bb = e->dest;
|
||||
edge eto = e->aux;
|
||||
struct redirection_data rd;
|
||||
struct local_info local_info;
|
||||
|
||||
e->aux = NULL;
|
||||
|
||||
thread_stats.num_threaded_edges++;
|
||||
|
||||
if (single_pred_p (bb))
|
||||
{
|
||||
/* If BB has just a single predecessor, we should only remove the
|
||||
control statements at its end, and successors except for ETO. */
|
||||
remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
|
||||
return bb;
|
||||
}
|
||||
|
||||
/* Otherwise, we need to create a copy. */
|
||||
update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
|
||||
|
||||
local_info.bb = bb;
|
||||
rd.outgoing_edge = eto;
|
||||
|
||||
create_block_for_threading (bb, &rd);
|
||||
create_edge_and_update_destination_phis (&rd);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
|
||||
e->src->index, e->dest->index, rd.dup_block->index);
|
||||
|
||||
rd.dup_block->count = e->count;
|
||||
rd.dup_block->frequency = EDGE_FREQUENCY (e);
|
||||
single_succ_edge (rd.dup_block)->count = e->count;
|
||||
redirect_edge_and_branch (e, rd.dup_block);
|
||||
flush_pending_stmts (e);
|
||||
|
||||
return rd.dup_block;
|
||||
}
|
||||
|
||||
/* Callback for dfs_enumerate_from. Returns true if BB is different
|
||||
from STOP and DBDS_CE_STOP. */
|
||||
|
||||
static basic_block dbds_ce_stop;
|
||||
static bool
|
||||
dbds_continue_enumeration_p (basic_block bb, void *stop)
|
||||
{
|
||||
return (bb != (basic_block) stop
|
||||
&& bb != dbds_ce_stop);
|
||||
}
|
||||
|
||||
/* Evaluates the dominance relationship of latch of the LOOP and BB, and
|
||||
returns the state. */
|
||||
|
||||
enum bb_dom_status
|
||||
{
|
||||
/* BB does not dominate latch of the LOOP. */
|
||||
DOMST_NONDOMINATING,
|
||||
/* The LOOP is broken (there is no path from the header to its latch. */
|
||||
DOMST_LOOP_BROKEN,
|
||||
/* BB dominates the latch of the LOOP. */
|
||||
DOMST_DOMINATING
|
||||
};
|
||||
|
||||
static enum bb_dom_status
|
||||
determine_bb_domination_status (struct loop *loop, basic_block bb)
|
||||
{
|
||||
basic_block *bblocks;
|
||||
unsigned nblocks, i;
|
||||
bool bb_reachable = false;
|
||||
edge_iterator ei;
|
||||
edge e;
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
/* This function assumes BB is a successor of LOOP->header. */
|
||||
{
|
||||
bool ok = false;
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
{
|
||||
if (e->src == loop->header)
|
||||
{
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gcc_assert (ok);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (bb == loop->latch)
|
||||
return DOMST_DOMINATING;
|
||||
|
||||
/* Check that BB dominates LOOP->latch, and that it is back-reachable
|
||||
from it. */
|
||||
|
||||
bblocks = XCNEWVEC (basic_block, loop->num_nodes);
|
||||
dbds_ce_stop = loop->header;
|
||||
nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
|
||||
bblocks, loop->num_nodes, bb);
|
||||
for (i = 0; i < nblocks; i++)
|
||||
FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
|
||||
{
|
||||
if (e->src == loop->header)
|
||||
{
|
||||
free (bblocks);
|
||||
return DOMST_NONDOMINATING;
|
||||
}
|
||||
if (e->src == bb)
|
||||
bb_reachable = true;
|
||||
}
|
||||
|
||||
free (bblocks);
|
||||
return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
|
||||
}
|
||||
|
||||
/* Thread jumps through the header of LOOP. Returns true if cfg changes.
|
||||
If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
|
||||
to the inside of the loop. */
|
||||
|
||||
static bool
|
||||
thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
|
||||
{
|
||||
basic_block header = loop->header;
|
||||
edge e, tgt_edge, latch = loop_latch_edge (loop);
|
||||
edge_iterator ei;
|
||||
basic_block tgt_bb, atgt_bb;
|
||||
enum bb_dom_status domst;
|
||||
|
||||
/* We have already threaded through headers to exits, so all the threading
|
||||
requests now are to the inside of the loop. We need to avoid creating
|
||||
irreducible regions (i.e., loops with more than one entry block), and
|
||||
also loop with several latch edges, or new subloops of the loop (although
|
||||
there are cases where it might be appropriate, it is difficult to decide,
|
||||
and doing it wrongly may confuse other optimizers).
|
||||
|
||||
We could handle more general cases here. However, the intention is to
|
||||
preserve some information about the loop, which is impossible if its
|
||||
structure changes significantly, in a way that is not well understood.
|
||||
Thus we only handle few important special cases, in which also updating
|
||||
of the loop-carried information should be feasible:
|
||||
|
||||
1) Propagation of latch edge to a block that dominates the latch block
|
||||
of a loop. This aims to handle the following idiom:
|
||||
|
||||
first = 1;
|
||||
while (1)
|
||||
{
|
||||
if (first)
|
||||
initialize;
|
||||
first = 0;
|
||||
body;
|
||||
}
|
||||
|
||||
After threading the latch edge, this becomes
|
||||
|
||||
first = 1;
|
||||
if (first)
|
||||
initialize;
|
||||
while (1)
|
||||
{
|
||||
first = 0;
|
||||
body;
|
||||
}
|
||||
|
||||
The original header of the loop is moved out of it, and we may thread
|
||||
the remaining edges through it without further constraints.
|
||||
|
||||
2) All entry edges are propagated to a single basic block that dominates
|
||||
the latch block of the loop. This aims to handle the following idiom
|
||||
(normally created for "for" loops):
|
||||
|
||||
i = 0;
|
||||
while (1)
|
||||
{
|
||||
if (i >= 100)
|
||||
break;
|
||||
body;
|
||||
i++;
|
||||
}
|
||||
|
||||
This becomes
|
||||
|
||||
i = 0;
|
||||
while (1)
|
||||
{
|
||||
body;
|
||||
i++;
|
||||
if (i >= 100)
|
||||
break;
|
||||
}
|
||||
*/
|
||||
|
||||
/* Threading through the header won't improve the code if the header has just
|
||||
one successor. */
|
||||
if (single_succ_p (header))
|
||||
goto fail;
|
||||
|
||||
if (latch->aux)
|
||||
{
|
||||
tgt_edge = latch->aux;
|
||||
tgt_bb = tgt_edge->dest;
|
||||
}
|
||||
else if (!may_peel_loop_headers
|
||||
&& !redirection_block_p (loop->header))
|
||||
goto fail;
|
||||
else
|
||||
{
|
||||
tgt_bb = NULL;
|
||||
tgt_edge = NULL;
|
||||
FOR_EACH_EDGE (e, ei, header->preds)
|
||||
{
|
||||
if (!e->aux)
|
||||
{
|
||||
if (e == latch)
|
||||
continue;
|
||||
|
||||
/* If latch is not threaded, and there is a header
|
||||
edge that is not threaded, we would create loop
|
||||
with multiple entries. */
|
||||
goto fail;
|
||||
}
|
||||
|
||||
tgt_edge = e->aux;
|
||||
atgt_bb = tgt_edge->dest;
|
||||
if (!tgt_bb)
|
||||
tgt_bb = atgt_bb;
|
||||
/* Two targets of threading would make us create loop
|
||||
with multiple entries. */
|
||||
else if (tgt_bb != atgt_bb)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!tgt_bb)
|
||||
{
|
||||
/* There are no threading requests. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Redirecting to empty loop latch is useless. */
|
||||
if (tgt_bb == loop->latch
|
||||
&& empty_block_p (loop->latch))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* The target block must dominate the loop latch, otherwise we would be
|
||||
creating a subloop. */
|
||||
domst = determine_bb_domination_status (loop, tgt_bb);
|
||||
if (domst == DOMST_NONDOMINATING)
|
||||
goto fail;
|
||||
if (domst == DOMST_LOOP_BROKEN)
|
||||
{
|
||||
/* If the loop ceased to exist, mark it as such, and thread through its
|
||||
original header. */
|
||||
loop->header = NULL;
|
||||
loop->latch = NULL;
|
||||
return thread_block (header, false);
|
||||
}
|
||||
|
||||
if (tgt_bb->loop_father->header == tgt_bb)
|
||||
{
|
||||
/* If the target of the threading is a header of a subloop, we need
|
||||
to create a preheader for it, so that the headers of the two loops
|
||||
do not merge. */
|
||||
if (EDGE_COUNT (tgt_bb->preds) > 2)
|
||||
{
|
||||
tgt_bb = create_preheader (tgt_bb->loop_father, 0);
|
||||
gcc_assert (tgt_bb != NULL);
|
||||
}
|
||||
else
|
||||
tgt_bb = split_edge (tgt_edge);
|
||||
}
|
||||
|
||||
if (latch->aux)
|
||||
{
|
||||
/* First handle the case latch edge is redirected. */
|
||||
loop->latch = thread_single_edge (latch);
|
||||
gcc_assert (single_succ (loop->latch) == tgt_bb);
|
||||
loop->header = tgt_bb;
|
||||
|
||||
/* Thread the remaining edges through the former header. */
|
||||
thread_block (header, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
basic_block new_preheader;
|
||||
|
||||
/* Now consider the case entry edges are redirected to the new entry
|
||||
block. Remember one entry edge, so that we can find the new
|
||||
preheader (its destination after threading). */
|
||||
FOR_EACH_EDGE (e, ei, header->preds)
|
||||
{
|
||||
if (e->aux)
|
||||
break;
|
||||
}
|
||||
|
||||
/* The duplicate of the header is the new preheader of the loop. Ensure
|
||||
that it is placed correctly in the loop hierarchy. */
|
||||
loop->copy = loop_outer (loop);
|
||||
|
||||
thread_block (header, false);
|
||||
loop->copy = NULL;
|
||||
new_preheader = e->dest;
|
||||
|
||||
/* Create the new latch block. This is always necessary, as the latch
|
||||
must have only a single successor, but the original header had at
|
||||
least two successors. */
|
||||
loop->latch = NULL;
|
||||
mfb_kj_edge = single_succ_edge (new_preheader);
|
||||
loop->header = mfb_kj_edge->dest;
|
||||
latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
|
||||
loop->header = latch->dest;
|
||||
loop->latch = latch->src;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
fail:
|
||||
/* We failed to thread anything. Cancel the requests. */
|
||||
FOR_EACH_EDGE (e, ei, header->preds)
|
||||
{
|
||||
e->aux = NULL;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Walk through the registered jump threads and convert them into a
|
||||
form convenient for this pass.
|
||||
|
||||
@ -838,6 +963,11 @@ static void
|
||||
mark_threaded_blocks (bitmap threaded_blocks)
|
||||
{
|
||||
unsigned int i;
|
||||
bitmap_iterator bi;
|
||||
bitmap tmp = BITMAP_ALLOC (NULL);
|
||||
basic_block bb;
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
|
||||
for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
|
||||
{
|
||||
@ -845,8 +975,30 @@ mark_threaded_blocks (bitmap threaded_blocks)
|
||||
edge e2 = VEC_index (edge, threaded_edges, i + 1);
|
||||
|
||||
e->aux = e2;
|
||||
bitmap_set_bit (threaded_blocks, e->dest->index);
|
||||
bitmap_set_bit (tmp, e->dest->index);
|
||||
}
|
||||
|
||||
/* If optimizing for size, only thread through block if we don't have
|
||||
to duplicate it or it's an otherwise empty redirection block. */
|
||||
if (optimize_size)
|
||||
{
|
||||
EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
|
||||
{
|
||||
bb = BASIC_BLOCK (i);
|
||||
if (EDGE_COUNT (bb->preds) > 1
|
||||
&& !redirection_block_p (bb))
|
||||
{
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
e->aux = NULL;
|
||||
}
|
||||
else
|
||||
bitmap_set_bit (threaded_blocks, i);
|
||||
}
|
||||
}
|
||||
else
|
||||
bitmap_copy (threaded_blocks, tmp);
|
||||
|
||||
BITMAP_FREE(tmp);
|
||||
}
|
||||
|
||||
|
||||
@ -856,15 +1008,20 @@ mark_threaded_blocks (bitmap threaded_blocks)
|
||||
It is the caller's responsibility to fix the dominance information
|
||||
and rewrite duplicated SSA_NAMEs back into SSA form.
|
||||
|
||||
If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
|
||||
loop headers if it does not simplify the loop.
|
||||
|
||||
Returns true if one or more edges were threaded, false otherwise. */
|
||||
|
||||
bool
|
||||
thread_through_all_blocks (void)
|
||||
thread_through_all_blocks (bool may_peel_loop_headers)
|
||||
{
|
||||
bool retval = false;
|
||||
unsigned int i;
|
||||
bitmap_iterator bi;
|
||||
bitmap threaded_blocks;
|
||||
struct loop *loop;
|
||||
loop_iterator li;
|
||||
|
||||
if (threaded_edges == NULL)
|
||||
return false;
|
||||
@ -874,14 +1031,38 @@ thread_through_all_blocks (void)
|
||||
|
||||
mark_threaded_blocks (threaded_blocks);
|
||||
|
||||
if (current_loops)
|
||||
FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
|
||||
loop->copy = NULL;
|
||||
|
||||
/* First perform the threading requests that do not affect
|
||||
loop structure. */
|
||||
EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
|
||||
{
|
||||
basic_block bb = BASIC_BLOCK (i);
|
||||
|
||||
if (EDGE_COUNT (bb->preds) > 0)
|
||||
retval |= thread_block (bb);
|
||||
retval |= thread_block (bb, true);
|
||||
}
|
||||
|
||||
/* Then perform the threading through loop headers. We start with the
|
||||
innermost loop, so that the changes in cfg we perform won't affect
|
||||
further threading. */
|
||||
if (current_loops)
|
||||
{
|
||||
FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
|
||||
{
|
||||
if (!loop->header
|
||||
|| !bitmap_bit_p (threaded_blocks, loop->header->index))
|
||||
continue;
|
||||
|
||||
retval |= thread_through_loop_header (loop, may_peel_loop_headers);
|
||||
}
|
||||
}
|
||||
|
||||
if (retval)
|
||||
free_dominance_info (CDI_DOMINATORS);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_STATS))
|
||||
fprintf (dump_file, "\nJumps threaded: %lu\n",
|
||||
thread_stats.num_threaded_edges);
|
||||
@ -890,6 +1071,7 @@ thread_through_all_blocks (void)
|
||||
threaded_blocks = NULL;
|
||||
VEC_free (edge, heap, threaded_edges);
|
||||
threaded_edges = NULL;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -5865,13 +5865,7 @@ identify_jump_threads (void)
|
||||
static void
|
||||
finalize_jump_threads (void)
|
||||
{
|
||||
bool cfg_altered = false;
|
||||
cfg_altered = thread_through_all_blocks ();
|
||||
|
||||
/* If we threaded jumps, then we need to recompute the dominance
|
||||
information. */
|
||||
if (cfg_altered)
|
||||
free_dominance_info (CDI_DOMINATORS);
|
||||
thread_through_all_blocks (false);
|
||||
VEC_free (tree, heap, stack);
|
||||
}
|
||||
|
||||
@ -5990,22 +5984,19 @@ vrp_finalize (void)
|
||||
static unsigned int
|
||||
execute_vrp (void)
|
||||
{
|
||||
insert_range_assertions ();
|
||||
|
||||
loop_optimizer_init (LOOPS_NORMAL);
|
||||
loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
|
||||
if (current_loops)
|
||||
scev_initialize ();
|
||||
{
|
||||
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
|
||||
scev_initialize ();
|
||||
}
|
||||
|
||||
insert_range_assertions ();
|
||||
|
||||
vrp_initialize ();
|
||||
ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node);
|
||||
vrp_finalize ();
|
||||
|
||||
if (current_loops)
|
||||
{
|
||||
scev_finalize ();
|
||||
loop_optimizer_finalize ();
|
||||
}
|
||||
|
||||
/* ASSERT_EXPRs must be removed before finalizing jump threads
|
||||
as finalizing jump threads calls the CFG cleanup code which
|
||||
does not properly handle ASSERT_EXPRs. */
|
||||
@ -6019,6 +6010,12 @@ execute_vrp (void)
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
finalize_jump_threads ();
|
||||
if (current_loops)
|
||||
{
|
||||
scev_finalize ();
|
||||
loop_optimizer_finalize ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user