tree-vrp.c (finalize_jump_threads): Do not care about dominance info.

* tree-vrp.c (finalize_jump_threads): Do not care about dominance info.
	(execute_vrp): Preserve loops through jump threading.
	* tree-ssa-threadupdate.c (thread_single_edge,
	dbds_continue_enumeration_p, determine_bb_domination_status,
	thread_through_loop_header): New functions.
	(create_edge_and_update_destination_phis,
	create_edge_and_update_destination_phis): Set loops for the new blocks.
	(prune_undesirable_thread_requests): Removed.
	(redirect_edges): Do not pretend that redirect_edge_and_branch can
	create new blocks.
	(thread_block): Do not call prune_undesirable_thread_requests.
	Update loops.
	(mark_threaded_blocks): Select edges to thread here.
	(thread_through_all_blocks): Take may_peel_loop_headers argument.
	Thread edges through loop headers independently.
	* cfgloopmanip.c (create_preheader, mfb_keep_just): Export.
	* tree-pass.h (TODO_mark_first_instance): New.
	(first_pass_instance): Declare.
	* cfghooks.c (duplicate_block): Put the block to the original loop
	if copy is not specified.
	* tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through
	jump threading.  Pass may_peel_loop_headers to
	thread_through_all_blocks according to first_pass_instance.
	* cfgloop.h (create_preheader): Declare.
	* tree-flow.h (thread_through_all_blocks): Declaration changed.
	* basic-block.h (mfb_keep_just, mfb_kj_edge): Declare.
	* passes.c (first_pass_instance): New variable.
	(next_pass_1): Set TODO_mark_first_instance.
	(execute_todo): Set first_pass_instance.

	* gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test.
	* gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c,
	gcc.dg/vect/vect-104.c: Use more complex construction to prevent vectorizing.
	* gcc.dg/tree-ssa/pr21559.c: Update outcome.

From-SVN: r124786
This commit is contained in:
Zdenek Dvorak 2007-05-17 10:10:24 +02:00 committed by Zdenek Dvorak
parent d2594859a6
commit b02b9b53ec
17 changed files with 660 additions and 296 deletions

View File

@ -1,3 +1,35 @@
2007-05-17 Zdenek Dvorak <dvorakz@suse.cz>
* tree-vrp.c (finalize_jump_threads): Do not care about dominance info.
(execute_vrp): Preserve loops through jump threading.
* tree-ssa-threadupdate.c (thread_single_edge,
dbds_continue_enumeration_p, determine_bb_domination_status,
thread_through_loop_header): New functions.
(create_edge_and_update_destination_phis,
create_edge_and_update_destination_phis): Set loops for the new blocks.
(prune_undesirable_thread_requests): Removed.
(redirect_edges): Do not pretend that redirect_edge_and_branch can
create new blocks.
(thread_block): Do not call prune_undesirable_thread_requests.
Update loops.
(mark_threaded_blocks): Select edges to thread here.
(thread_through_all_blocks): Take may_peel_loop_headers argument.
Thread edges through loop headers independently.
* cfgloopmanip.c (create_preheader, mfb_keep_just): Export.
* tree-pass.h (TODO_mark_first_instance): New.
(first_pass_instance): Declare.
* cfghooks.c (duplicate_block): Put the block to the original loop
if copy is not specified.
* tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through
jump threading. Pass may_peel_loop_headers to
thread_through_all_blocks according to first_pass_instance.
* cfgloop.h (create_preheader): Declare.
* tree-flow.h (thread_through_all_blocks): Declaration changed.
* basic-block.h (mfb_keep_just, mfb_kj_edge): Declare.
* passes.c (first_pass_instance): New variable.
(next_pass_1): Set TODO_mark_first_instance.
(execute_todo): Set first_pass_instance.
2007-05-17 Uros Bizjak <ubizjak@gmail.com>
PR tree-optimization/24659

View File

@ -1173,4 +1173,8 @@ bb_has_eh_pred (basic_block bb)
return false;
}
/* In cfgloopmanip.c. */
extern edge mfb_kj_edge;
bool mfb_keep_just (edge);
#endif /* GCC_BASIC_BLOCK_H */

View File

@ -923,9 +923,15 @@ duplicate_block (basic_block bb, edge e, basic_block after)
set_bb_original (new_bb, bb);
set_bb_copy (bb, new_bb);
/* Add the new block to the prescribed loop. */
/* Add the new block to the copy of the loop of BB, or directly to the loop
of BB if the loop is not being copied. */
if (current_loops != NULL)
add_bb_to_loop (new_bb, bb->loop_father->copy);
{
struct loop *cloop = bb->loop_father;
if (cloop->copy)
cloop = cloop->copy;
add_bb_to_loop (new_bb, cloop);
}
return new_bb;
}

View File

@ -252,6 +252,7 @@ enum
CP_SIMPLE_PREHEADERS = 1
};
basic_block create_preheader (struct loop *, int);
extern void create_preheaders (int);
extern void force_single_succ_latches (void);

View File

@ -41,7 +41,6 @@ static int find_path (edge, basic_block **);
static void fix_loop_placements (struct loop *, bool *);
static bool fix_bb_placement (basic_block);
static void fix_bb_placements (basic_block, bool *);
static basic_block create_preheader (struct loop *, int);
static void unloop (struct loop *, bool *);
#define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
@ -1085,8 +1084,8 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
whether to redirect it. */
static edge mfb_kj_edge;
static bool
edge mfb_kj_edge;
bool
mfb_keep_just (edge e)
{
return e != mfb_kj_edge;
@ -1097,7 +1096,7 @@ mfb_keep_just (edge e)
entry; otherwise we also force preheader block to have only one successor.
The function also updates dominators. */
static basic_block
basic_block
create_preheader (struct loop *loop, int flags)
{
edge e, fallthru;

View File

@ -105,6 +105,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
/* Global variables used to communicate with passes. */
int dump_flags;
bool in_gimple_form;
bool first_pass_instance;
/* This is called from various places for FUNCTION_DECL, VAR_DECL,
@ -392,6 +393,8 @@ next_pass_1 (struct tree_opt_pass **list, struct tree_opt_pass *pass)
memcpy (new, pass, sizeof (*new));
new->next = NULL;
new->todo_flags_start &= ~TODO_mark_first_instance;
/* Indicate to register_dump_files that this pass has duplicates,
and so it should rename the dump file. The first instance will
be -1, and be number of duplicates = -static_pass_number - 1.
@ -406,6 +409,7 @@ next_pass_1 (struct tree_opt_pass **list, struct tree_opt_pass *pass)
}
else
{
pass->todo_flags_start |= TODO_mark_first_instance;
pass->static_pass_number = -1;
*list = pass;
}
@ -932,6 +936,9 @@ execute_todo (unsigned int flags)
gcc_assert (flags & TODO_update_ssa_any);
#endif
/* Inform the pass whether it is the first time it is run. */
first_pass_instance = (flags & TODO_mark_first_instance) != 0;
do_per_function (execute_function_todo, (void *)(size_t) flags);
/* Always remove functions just as before inlining: IPA passes might be

View File

@ -1,3 +1,10 @@
2007-05-17 Zdenek Dvorak <dvorakz@suse.cz>
* gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test.
* gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c,
gcc.dg/vect/vect-104.c: Use more complex construction to prevent vectorizing.
* gcc.dg/tree-ssa/pr21559.c: Update outcome.
2007-05-17 Uros Bizjak <ubizjak@gmail.com>
PR tree-optimization/24659

View File

@ -35,11 +35,9 @@ void foo (void)
/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
/* Second, we should thread the edge out of the loop via the break
statement. */
/* { dg-final { scan-tree-dump-times "Threaded jump" 1 "vrp1" } } */
/* Now if we were really good, we'd realize that the final bytes == 0
test is totally useless. That's not likely to happen anytime soon. */
statement. We also realize that the final bytes == 0 test is useless,
and thread over it. */
/* { dg-final { scan-tree-dump-times "Threaded jump" 2 "vrp1" } } */
/* { dg-final { cleanup-tree-dump "vrp1" } } */

View File

@ -0,0 +1,119 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom1-stats" } */
void foo();
void bla();
void bar();
/* In the following two cases, we should be able to thread edge through
the loop header. */
void thread_entry_through_header (void)
{
int i;
for (i = 0; i < 170; i++)
bla ();
}
void thread_latch_through_header (void)
{
int i = 0;
int first = 1;
do
{
if (first)
foo ();
first = 0;
bla ();
} while (i++ < 100);
}
/* This is a TODO -- it is correct to thread both entry and latch edge through
the header, but we do not handle this case yet. */
void dont_thread_1 (void)
{
int i = 0;
int first = 1;
do
{
if (first)
foo ();
else
bar ();
first = 0;
bla ();
} while (i++ < 100);
}
/* Avoid threading in the following two cases, to prevent creating subloops. */
void dont_thread_2 (int first)
{
int i = 0;
do
{
if (first)
foo ();
else
bar ();
first = 0;
bla ();
} while (i++ < 100);
}
void dont_thread_3 (int nfirst)
{
int i = 0;
int first = 0;
do
{
if (first)
foo ();
else
bar ();
first = nfirst;
bla ();
} while (i++ < 100);
}
/* Avoid threading in this case, in order to avoid creating loop with
multiple entries. */
void dont_thread_4 (int a, int nfirst)
{
int i = 0;
int first;
if (a)
first = 0;
else
first = 1;
do
{
if (first)
foo ();
else
bar ();
first = nfirst;
bla ();
} while (i++ < 100);
}
/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "vrp1"} } */
/* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 0 "vrp1"} } */
/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 0 "dom1"} } */
/* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 1 "dom1"} } */
/* { dg-final { cleanup-tree-dump "dom1" } } */
/* { dg-final { cleanup-tree-dump "vrp1" } } */

View File

@ -14,6 +14,7 @@ struct extraction
static int a[N] = {1,2,3,4,5,6,7,8,9};
static int b[N] = {2,3,4,5,6,7,8,9,9};
volatile int foo;
int main1 (int x, int y) {
int i;
@ -23,7 +24,7 @@ int main1 (int x, int y) {
for (i = 0; i < N; i++)
{
p->a[i] = a[i];
if (x == 135)
if (foo == 135)
abort (); /* to avoid vectorization */
}
@ -46,6 +47,7 @@ int main (void)
{
check_vect ();
foo = 0;
return main1 (0, N);
}

View File

@ -15,6 +15,7 @@ struct extraction
static int a[N] = {1,2,3,4,5,6,7,8,9};
static int b[N] = {17,24,7,0,2,3,4,31,82};
static int c[N] = {9,17,24,7,0,2,3,4,31};
volatile int foo;
int main1 (int x, int y) {
int i;
@ -25,7 +26,7 @@ int main1 (int x, int y) {
{
p->a[i] = a[i];
p->b[i] = b[i];
if (x == 135)
if (foo == 135)
abort (); /* to avoid vectorization */
}
@ -48,6 +49,7 @@ int main (void)
{
check_vect ();
foo = 0;
return main1 (0, N);
}

View File

@ -15,6 +15,7 @@ struct extraction
static int a[N][N] = {{1,2,3},{4,5,6},{7,8,9}};
static int b[N][N] = {{17,24,7},{0,2,3},{4,31,82}};
static int c[N][N] = {{1,2,3},{4,6,8},{8,9,9}};
volatile int foo;
int main1 (int x) {
int i,j;
@ -27,7 +28,7 @@ int main1 (int x) {
{
p->a[i][j] = a[i][j];
p->b[i][j] = b[i][j];
if (x == 135)
if (foo == 135)
abort (); /* to avoid vectorization */
}
}
@ -57,6 +58,7 @@ int main (void)
{
check_vect ();
foo = 0;
return main1 (N);
}

View File

@ -1113,7 +1113,7 @@ bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode);
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode);
/* In tree-ssa-threadupdate.c. */
extern bool thread_through_all_blocks (void);
extern bool thread_through_all_blocks (bool);
extern void register_jump_thread (edge, edge);
/* In gimplify.c */

View File

@ -218,6 +218,9 @@ struct dump_file_info
for the passes that are handed to register_dump_files. */
#define TODO_set_props (1 << 15)
/* Internally used for the first instance of a pass. */
#define TODO_mark_first_instance (1 << 16)
#define TODO_update_ssa_any \
(TODO_update_ssa \
| TODO_update_ssa_no_phi \
@ -417,4 +420,13 @@ extern struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes;
extern void execute_pass_list (struct tree_opt_pass *);
extern void execute_ipa_pass_list (struct tree_opt_pass *);
/* Set to true if the pass is called the first time during compilation of the
current function. Note that using this information in the optimization
passes is considered not to be clean, and it should be avoided if possible.
This flag is currently used to prevent loops from being peeled repeatedly
in jump threading; it will be removed once we preserve loop structures
throughout the compilation -- we will be able to mark the affected loops
directly in jump threading, and avoid peeling them next time. */
extern bool first_pass_instance;
#endif /* GCC_TREE_PASS_H */

View File

@ -277,25 +277,17 @@ tree_ssa_dominator_optimize (void)
calculate_dominance_info (CDI_DOMINATORS);
cfg_altered = false;
/* We need to know which edges exit loops so that we can
aggressively thread through loop headers to an exit
edge. */
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
if (current_loops)
{
mark_loop_exit_edges ();
loop_optimizer_finalize ();
}
/* Clean up the CFG so that any forwarder blocks created by loop
canonicalization are removed. */
cleanup_tree_cfg ();
calculate_dominance_info (CDI_DOMINATORS);
/* We need to know loop structures in order to avoid destroying them
in jump threading. Note that we still can e.g. thread through loop
headers to an exit edge, or through loop header to the loop body, assuming
that we update the loop info. */
loop_optimizer_init (LOOPS_HAVE_SIMPLE_LATCHES);
/* We need accurate information regarding back edges in the CFG
for jump threading. */
for jump threading; this may include back edes that are not part of
a single loop. */
mark_dfs_back_edges ();
/* Recursively walk the dominator tree optimizing statements. */
walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
@ -319,7 +311,7 @@ tree_ssa_dominator_optimize (void)
free_all_edge_infos ();
/* Thread jumps, creating duplicate blocks as needed. */
cfg_altered |= thread_through_all_blocks ();
cfg_altered |= thread_through_all_blocks (first_pass_instance);
if (cfg_altered)
free_dominance_info (CDI_DOMINATORS);
@ -353,6 +345,8 @@ tree_ssa_dominator_optimize (void)
if (dump_file && (dump_flags & TDF_STATS))
dump_dominator_optimization_stats (dump_file);
loop_optimizer_finalize ();
/* Delete our main hashtable. */
htab_delete (avail_exprs);

View File

@ -315,6 +315,7 @@ create_edge_and_update_destination_phis (struct redirection_data *rd)
e->probability = REG_BR_PROB_BASE;
e->count = rd->dup_block->count;
e->aux = rd->outgoing_edge->aux;
/* If there are any PHI nodes at the destination of the outgoing edge
from the duplicate block, then we will need to add a new argument
@ -385,199 +386,6 @@ fixup_template_block (void **slot, void *data)
return 1;
}
/* Not all jump threading requests are useful. In particular some
jump threading requests can create irreducible regions which are
undesirable.
This routine will examine the BB's incoming edges for jump threading
requests which, if acted upon, would create irreducible regions. Any
such jump threading requests found will be pruned away. */
static void
prune_undesirable_thread_requests (basic_block bb)
{
edge e;
edge_iterator ei;
bool may_create_irreducible_region = false;
unsigned int num_outgoing_edges_into_loop = 0;
/* For the heuristics below, we need to know if BB has more than
one outgoing edge into a loop. */
FOR_EACH_EDGE (e, ei, bb->succs)
num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0);
if (num_outgoing_edges_into_loop > 1)
{
edge backedge = NULL;
/* Consider the effect of threading the edge (0, 1) to 2 on the left
CFG to produce the right CFG:
0 0
| |
1<--+ 2<--------+
/ \ | | |
2 3 | 4<----+ |
\ / | / \ | |
4---+ E 1-- | --+
| | |
E 3---+
Threading the (0, 1) edge to 2 effectively creates two loops
(2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested.
This is not good.
However, we do need to be able to thread (0, 1) to 2 or 3
in the left CFG below (which creates the middle and right
CFGs with nested loops).
0 0 0
| | |
1<--+ 2<----+ 3<-+<-+
/| | | | | | |
2 | | 3<-+ | 1--+ |
\| | | | | | |
3---+ 1--+--+ 2-----+
A safe heuristic appears to be to only allow threading if BB
has a single incoming backedge from one of its direct successors. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
if (e->flags & EDGE_DFS_BACK)
{
if (backedge)
{
backedge = NULL;
break;
}
else
{
backedge = e;
}
}
}
if (backedge && find_edge (bb, backedge->src))
;
else
may_create_irreducible_region = true;
}
else
{
edge dest = NULL;
/* If we thread across the loop entry block (BB) into the
loop and BB is still reached from outside the loop, then
we would create an irreducible CFG. Consider the effect
of threading the edge (1, 4) to 5 on the left CFG to produce
the right CFG
0 0
/ \ / \
1 2 1 2
\ / | |
4<----+ 5<->4
/ \ | |
E 5---+ E
Threading the (1, 4) edge to 5 creates two entry points
into the loop (4, 5) (one from block 1, the other from
block 2). A classic irreducible region.
So look at all of BB's incoming edges which are not
backedges and which are not threaded to the loop exit.
If that subset of incoming edges do not all thread
to the same block, then threading any of them will create
an irreducible region. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
edge e2;
/* We ignore back edges for now. This may need refinement
as threading a backedge creates an inner loop which
we would need to verify has a single entry point.
If all backedges thread to new locations, then this
block will no longer have incoming backedges and we
need not worry about creating irreducible regions
by threading through BB. I don't think this happens
enough in practice to worry about it. */
if (e->flags & EDGE_DFS_BACK)
continue;
/* If the incoming edge threads to the loop exit, then it
is clearly safe. */
e2 = e->aux;
if (e2 && (e2->flags & EDGE_LOOP_EXIT))
continue;
/* E enters the loop header and is not threaded. We can
not allow any other incoming edges to thread into
the loop as that would create an irreducible region. */
if (!e2)
{
may_create_irreducible_region = true;
break;
}
/* We know that this incoming edge threads to a block inside
the loop. This edge must thread to the same target in
the loop as any previously seen threaded edges. Otherwise
we will create an irreducible region. */
if (!dest)
dest = e2;
else if (e2 != dest)
{
may_create_irreducible_region = true;
break;
}
}
}
/* If we might create an irreducible region, then cancel any of
the jump threading requests for incoming edges which are
not backedges and which do not thread to the exit block. */
if (may_create_irreducible_region)
{
FOR_EACH_EDGE (e, ei, bb->preds)
{
edge e2;
/* Ignore back edges. */
if (e->flags & EDGE_DFS_BACK)
continue;
e2 = e->aux;
/* If this incoming edge was not threaded, then there is
nothing to do. */
if (!e2)
continue;
/* If this incoming edge threaded to the loop exit,
then it can be ignored as it is safe. */
if (e2->flags & EDGE_LOOP_EXIT)
continue;
if (e2)
{
/* This edge threaded into the loop and the jump thread
request must be cancelled. */
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Not threading jump %d --> %d to %d\n",
e->src->index, e->dest->index, e2->dest->index);
e->aux = NULL;
}
}
}
}
/* Hash table traversal callback to redirect each incoming edge
associated with this hash table element to its new destination. */
@ -620,11 +428,8 @@ redirect_edges (void **slot, void *data)
/* Redirect the incoming edge to the appropriate duplicate
block. */
e2 = redirect_edge_and_branch (e, rd->dup_block);
gcc_assert (e == e2);
flush_pending_stmts (e2);
if ((dump_file && (dump_flags & TDF_DETAILS))
&& e->src != e2->src)
fprintf (dump_file, " basic block %d created\n", e2->src->index);
}
else
{
@ -696,46 +501,23 @@ redirection_block_p (basic_block bb)
successor of BB. We then revector the incoming edges into BB to
the appropriate duplicate of BB.
BB and its duplicates will have assignments to the same set of
SSA_NAMEs. Right now, we just call into update_ssa to update the
SSA graph for those names.
We are also going to experiment with a true incremental update
scheme for the duplicated resources. One of the interesting
properties we can exploit here is that all the resources set
in BB will have the same IDFS, so we have one IDFS computation
per block with incoming threaded edges, which can lower the
cost of the true incremental update algorithm. */
If NOLOOP_ONLY is true, we only perform the threading as long as it
does not affect the structure of the loops in a nontrivial way. */
static bool
thread_block (basic_block bb)
thread_block (basic_block bb, bool noloop_only)
{
/* E is an incoming edge into BB that we may or may not want to
redirect to a duplicate of BB. */
edge e;
edge e, e2;
edge_iterator ei;
struct local_info local_info;
/* FOUND_BACKEDGE indicates that we found an incoming backedge
into BB, in which case we may ignore certain jump threads
to avoid creating irreducible regions. */
bool found_backedge = false;
struct loop *loop = bb->loop_father;
/* ALL indicates whether or not all incoming edges into BB should
be threaded to a duplicate of BB. */
bool all = true;
/* If optimizing for size, only thread this block if we don't have
to duplicate it or it's an otherwise empty redirection block. */
if (optimize_size
&& EDGE_COUNT (bb->preds) > 1
&& !redirection_block_p (bb))
{
FOR_EACH_EDGE (e, ei, bb->preds)
e->aux = NULL;
return false;
}
/* To avoid scanning a linear array for the element we need we instead
use a hash table. For normal code there should be no noticeable
difference. However, if we have a block with a large number of
@ -745,35 +527,45 @@ thread_block (basic_block bb)
redirection_data_eq,
free);
FOR_EACH_EDGE (e, ei, bb->preds)
found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0);
/* If we thread the latch of the loop to its exit, the loop ceases to
exist. Make sure we do not restrict ourselves in order to preserve
this loop. */
if (current_loops && loop->header == bb)
{
e = loop_latch_edge (loop);
e2 = e->aux;
/* If BB has incoming backedges, then threading across BB might
introduce an irreducible region, which would be undesirable
as that inhibits various optimizations later. Prune away
any jump threading requests which we know will result in
an irreducible region. */
if (found_backedge)
prune_undesirable_thread_requests (bb);
if (e2 && loop_exit_edge_p (loop, e2))
{
loop->header = NULL;
loop->latch = NULL;
}
}
/* Record each unique threaded destination into a hash table for
efficient lookups. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
if (!e->aux)
e2 = e->aux;
if (!e2
/* If NOLOOP_ONLY is true, we only allow threading through the
header of a loop to exit edges. */
|| (noloop_only
&& current_loops
&& bb == bb->loop_father->header
&& !loop_exit_edge_p (bb->loop_father, e2)))
{
all = false;
continue;
}
else
{
edge e2 = e->aux;
update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
e->count, e->aux);
/* Insert the outgoing edge into the hash table if it is not
already in the hash table. */
lookup_redirection_data (e2, e, INSERT);
}
update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
e->count, e->aux);
/* Insert the outgoing edge into the hash table if it is not
already in the hash table. */
lookup_redirection_data (e2, e, INSERT);
}
/* If we are going to thread all incoming edges to an outgoing edge, then
@ -821,6 +613,339 @@ thread_block (basic_block bb)
return local_info.jumps_threaded;
}
/* Threads edge E through E->dest to the edge E->aux. Returns the copy
of E->dest created during threading, or E->dest if it was not necessary
to copy it (E is its single predecessor). */
static basic_block
thread_single_edge (edge e)
{
basic_block bb = e->dest;
edge eto = e->aux;
struct redirection_data rd;
struct local_info local_info;
e->aux = NULL;
thread_stats.num_threaded_edges++;
if (single_pred_p (bb))
{
/* If BB has just a single predecessor, we should only remove the
control statements at its end, and successors except for ETO. */
remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
return bb;
}
/* Otherwise, we need to create a copy. */
update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
local_info.bb = bb;
rd.outgoing_edge = eto;
create_block_for_threading (bb, &rd);
create_edge_and_update_destination_phis (&rd);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
e->src->index, e->dest->index, rd.dup_block->index);
rd.dup_block->count = e->count;
rd.dup_block->frequency = EDGE_FREQUENCY (e);
single_succ_edge (rd.dup_block)->count = e->count;
redirect_edge_and_branch (e, rd.dup_block);
flush_pending_stmts (e);
return rd.dup_block;
}
/* Callback for dfs_enumerate_from. Returns true if BB is different
from STOP and DBDS_CE_STOP. */
static basic_block dbds_ce_stop;
static bool
dbds_continue_enumeration_p (basic_block bb, void *stop)
{
return (bb != (basic_block) stop
&& bb != dbds_ce_stop);
}
/* Evaluates the dominance relationship of latch of the LOOP and BB, and
returns the state. */
enum bb_dom_status
{
/* BB does not dominate latch of the LOOP. */
DOMST_NONDOMINATING,
/* The LOOP is broken (there is no path from the header to its latch. */
DOMST_LOOP_BROKEN,
/* BB dominates the latch of the LOOP. */
DOMST_DOMINATING
};
static enum bb_dom_status
determine_bb_domination_status (struct loop *loop, basic_block bb)
{
basic_block *bblocks;
unsigned nblocks, i;
bool bb_reachable = false;
edge_iterator ei;
edge e;
#ifdef ENABLE_CHECKING
/* This function assumes BB is a successor of LOOP->header. */
{
bool ok = false;
FOR_EACH_EDGE (e, ei, bb->preds)
{
if (e->src == loop->header)
{
ok = true;
break;
}
}
gcc_assert (ok);
}
#endif
if (bb == loop->latch)
return DOMST_DOMINATING;
/* Check that BB dominates LOOP->latch, and that it is back-reachable
from it. */
bblocks = XCNEWVEC (basic_block, loop->num_nodes);
dbds_ce_stop = loop->header;
nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
bblocks, loop->num_nodes, bb);
for (i = 0; i < nblocks; i++)
FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
{
if (e->src == loop->header)
{
free (bblocks);
return DOMST_NONDOMINATING;
}
if (e->src == bb)
bb_reachable = true;
}
free (bblocks);
return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
}
/* Thread jumps through the header of LOOP. Returns true if cfg changes.
If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
to the inside of the loop. */
static bool
thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
{
basic_block header = loop->header;
edge e, tgt_edge, latch = loop_latch_edge (loop);
edge_iterator ei;
basic_block tgt_bb, atgt_bb;
enum bb_dom_status domst;
/* We have already threaded through headers to exits, so all the threading
requests now are to the inside of the loop. We need to avoid creating
irreducible regions (i.e., loops with more than one entry block), and
also loop with several latch edges, or new subloops of the loop (although
there are cases where it might be appropriate, it is difficult to decide,
and doing it wrongly may confuse other optimizers).
We could handle more general cases here. However, the intention is to
preserve some information about the loop, which is impossible if its
structure changes significantly, in a way that is not well understood.
Thus we only handle few important special cases, in which also updating
of the loop-carried information should be feasible:
1) Propagation of latch edge to a block that dominates the latch block
of a loop. This aims to handle the following idiom:
first = 1;
while (1)
{
if (first)
initialize;
first = 0;
body;
}
After threading the latch edge, this becomes
first = 1;
if (first)
initialize;
while (1)
{
first = 0;
body;
}
The original header of the loop is moved out of it, and we may thread
the remaining edges through it without further constraints.
2) All entry edges are propagated to a single basic block that dominates
the latch block of the loop. This aims to handle the following idiom
(normally created for "for" loops):
i = 0;
while (1)
{
if (i >= 100)
break;
body;
i++;
}
This becomes
i = 0;
while (1)
{
body;
i++;
if (i >= 100)
break;
}
*/
/* Threading through the header won't improve the code if the header has just
one successor. */
if (single_succ_p (header))
goto fail;
if (latch->aux)
{
tgt_edge = latch->aux;
tgt_bb = tgt_edge->dest;
}
else if (!may_peel_loop_headers
&& !redirection_block_p (loop->header))
goto fail;
else
{
tgt_bb = NULL;
tgt_edge = NULL;
FOR_EACH_EDGE (e, ei, header->preds)
{
if (!e->aux)
{
if (e == latch)
continue;
/* If latch is not threaded, and there is a header
edge that is not threaded, we would create loop
with multiple entries. */
goto fail;
}
tgt_edge = e->aux;
atgt_bb = tgt_edge->dest;
if (!tgt_bb)
tgt_bb = atgt_bb;
/* Two targets of threading would make us create loop
with multiple entries. */
else if (tgt_bb != atgt_bb)
goto fail;
}
if (!tgt_bb)
{
/* There are no threading requests. */
return false;
}
/* Redirecting to empty loop latch is useless. */
if (tgt_bb == loop->latch
&& empty_block_p (loop->latch))
goto fail;
}
/* The target block must dominate the loop latch, otherwise we would be
creating a subloop. */
domst = determine_bb_domination_status (loop, tgt_bb);
if (domst == DOMST_NONDOMINATING)
goto fail;
if (domst == DOMST_LOOP_BROKEN)
{
/* If the loop ceased to exist, mark it as such, and thread through its
original header. */
loop->header = NULL;
loop->latch = NULL;
return thread_block (header, false);
}
if (tgt_bb->loop_father->header == tgt_bb)
{
/* If the target of the threading is a header of a subloop, we need
to create a preheader for it, so that the headers of the two loops
do not merge. */
if (EDGE_COUNT (tgt_bb->preds) > 2)
{
tgt_bb = create_preheader (tgt_bb->loop_father, 0);
gcc_assert (tgt_bb != NULL);
}
else
tgt_bb = split_edge (tgt_edge);
}
if (latch->aux)
{
/* First handle the case latch edge is redirected. */
loop->latch = thread_single_edge (latch);
gcc_assert (single_succ (loop->latch) == tgt_bb);
loop->header = tgt_bb;
/* Thread the remaining edges through the former header. */
thread_block (header, false);
}
else
{
basic_block new_preheader;
/* Now consider the case entry edges are redirected to the new entry
block. Remember one entry edge, so that we can find the new
preheader (its destination after threading). */
FOR_EACH_EDGE (e, ei, header->preds)
{
if (e->aux)
break;
}
/* The duplicate of the header is the new preheader of the loop. Ensure
that it is placed correctly in the loop hierarchy. */
loop->copy = loop_outer (loop);
thread_block (header, false);
loop->copy = NULL;
new_preheader = e->dest;
/* Create the new latch block. This is always necessary, as the latch
must have only a single successor, but the original header had at
least two successors. */
loop->latch = NULL;
mfb_kj_edge = single_succ_edge (new_preheader);
loop->header = mfb_kj_edge->dest;
latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
loop->header = latch->dest;
loop->latch = latch->src;
}
return true;
fail:
/* We failed to thread anything. Cancel the requests. */
FOR_EACH_EDGE (e, ei, header->preds)
{
e->aux = NULL;
}
return false;
}
/* Walk through the registered jump threads and convert them into a
form convenient for this pass.
@ -838,6 +963,11 @@ static void
mark_threaded_blocks (bitmap threaded_blocks)
{
unsigned int i;
bitmap_iterator bi;
bitmap tmp = BITMAP_ALLOC (NULL);
basic_block bb;
edge e;
edge_iterator ei;
for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
{
@ -845,8 +975,30 @@ mark_threaded_blocks (bitmap threaded_blocks)
edge e2 = VEC_index (edge, threaded_edges, i + 1);
e->aux = e2;
bitmap_set_bit (threaded_blocks, e->dest->index);
bitmap_set_bit (tmp, e->dest->index);
}
/* If optimizing for size, only thread through block if we don't have
to duplicate it or it's an otherwise empty redirection block. */
if (optimize_size)
{
EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
{
bb = BASIC_BLOCK (i);
if (EDGE_COUNT (bb->preds) > 1
&& !redirection_block_p (bb))
{
FOR_EACH_EDGE (e, ei, bb->preds)
e->aux = NULL;
}
else
bitmap_set_bit (threaded_blocks, i);
}
}
else
bitmap_copy (threaded_blocks, tmp);
BITMAP_FREE(tmp);
}
@ -856,15 +1008,20 @@ mark_threaded_blocks (bitmap threaded_blocks)
It is the caller's responsibility to fix the dominance information
and rewrite duplicated SSA_NAMEs back into SSA form.
If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
loop headers if it does not simplify the loop.
Returns true if one or more edges were threaded, false otherwise. */
bool
thread_through_all_blocks (void)
thread_through_all_blocks (bool may_peel_loop_headers)
{
bool retval = false;
unsigned int i;
bitmap_iterator bi;
bitmap threaded_blocks;
struct loop *loop;
loop_iterator li;
if (threaded_edges == NULL)
return false;
@ -874,14 +1031,38 @@ thread_through_all_blocks (void)
mark_threaded_blocks (threaded_blocks);
if (current_loops)
FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
loop->copy = NULL;
/* First perform the threading requests that do not affect
loop structure. */
EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
{
basic_block bb = BASIC_BLOCK (i);
if (EDGE_COUNT (bb->preds) > 0)
retval |= thread_block (bb);
retval |= thread_block (bb, true);
}
/* Then perform the threading through loop headers. We start with the
innermost loop, so that the changes in cfg we perform won't affect
further threading. */
if (current_loops)
{
FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
{
if (!loop->header
|| !bitmap_bit_p (threaded_blocks, loop->header->index))
continue;
retval |= thread_through_loop_header (loop, may_peel_loop_headers);
}
}
if (retval)
free_dominance_info (CDI_DOMINATORS);
if (dump_file && (dump_flags & TDF_STATS))
fprintf (dump_file, "\nJumps threaded: %lu\n",
thread_stats.num_threaded_edges);
@ -890,6 +1071,7 @@ thread_through_all_blocks (void)
threaded_blocks = NULL;
VEC_free (edge, heap, threaded_edges);
threaded_edges = NULL;
return retval;
}

View File

@ -5865,13 +5865,7 @@ identify_jump_threads (void)
static void
finalize_jump_threads (void)
{
bool cfg_altered = false;
cfg_altered = thread_through_all_blocks ();
/* If we threaded jumps, then we need to recompute the dominance
information. */
if (cfg_altered)
free_dominance_info (CDI_DOMINATORS);
thread_through_all_blocks (false);
VEC_free (tree, heap, stack);
}
@ -5990,22 +5984,19 @@ vrp_finalize (void)
static unsigned int
execute_vrp (void)
{
insert_range_assertions ();
loop_optimizer_init (LOOPS_NORMAL);
loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
if (current_loops)
scev_initialize ();
{
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
scev_initialize ();
}
insert_range_assertions ();
vrp_initialize ();
ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node);
vrp_finalize ();
if (current_loops)
{
scev_finalize ();
loop_optimizer_finalize ();
}
/* ASSERT_EXPRs must be removed before finalizing jump threads
as finalizing jump threads calls the CFG cleanup code which
does not properly handle ASSERT_EXPRs. */
@ -6019,6 +6010,12 @@ execute_vrp (void)
update_ssa (TODO_update_ssa);
finalize_jump_threads ();
if (current_loops)
{
scev_finalize ();
loop_optimizer_finalize ();
}
return 0;
}