diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5558cc535ba1..6cb9af49ec0e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2003-10-17 Mostafa Hagog + + * common.opt: Add description of the new -fgcse-las flag. + * flags.h (flag_gcse_las): Declaration of global flag_gcse_las. + * gcse.c (hash_scan_set): Handle the case of store expression and + insert the memory expression to the hash table, this way we make it + possible to discover redundant loads after stores and remove them. + (pre_insert_copy_insn): moved the call to update_ld_motion_stores, + to pre_insert_copies, it is not the correct place to call it after + adding stores to be in the available expression hash table. + (pre_insert_copies): Added the call to update_ld_motion_stores when + one or more copies were inserted. + * opts.c (common_handle_option): Handle the -fgcse-las flag. + * toplev.c (flag_gcse_las): Initialization of flag_gcse_las. + + * doc/invoke.tex: Document new -fgcse-las flag. + 2003-10-18 Alan Modra * config/rs6000/crtsavres.asm: Correct alignment of powerpc64 code diff --git a/gcc/common.opt b/gcc/common.opt index 26af2806a24d..fdf28b214259 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -362,6 +362,10 @@ fgcse-sm Common Perform store motion after global common subexpression elimination +fgcse-las +Common +Perform redundant load after store elimination in global common subexpression elimination + fgnu-linker Common Output GNU ld formatted global initializers diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 474c9b711c71..fd2040034a89 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -270,8 +270,8 @@ in the following sections. -fdelayed-branch -fdelete-null-pointer-checks @gol -fexpensive-optimizations -ffast-math -ffloat-store @gol -fforce-addr -fforce-mem -ffunction-sections @gol --fgcse -fgcse-lm -fgcse-sm -floop-optimize -fcrossjumping @gol --fif-conversion -fif-conversion2 @gol +-fgcse -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize @gol +-fcrossjumping -fif-conversion -fif-conversion2 @gol -finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol -fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol -fmove-all-movables -fnew-ra -fno-branch-count-reg @gol @@ -3677,10 +3677,10 @@ also turns on the following optimization flags: -fstrength-reduce @gol -fcse-follow-jumps -fcse-skip-blocks @gol -frerun-cse-after-loop -frerun-loop-opt @gol --fgcse -fgcse-lm -fgcse-sm @gol +-fgcse -fgcse-lm -fgcse-sm -fgcse-las @gol -fdelete-null-pointer-checks @gol -fexpensive-optimizations @gol --fregmove -@gol +-fregmove @gol -fschedule-insns -fschedule-insns2 @gol -fsched-interblock -fsched-spec @gol -fcaller-saves @gol @@ -3996,10 +3996,19 @@ Enabled by default when gcse is enabled. @item -fgcse-sm @opindex fgcse-sm -When @option{-fgcse-sm} is enabled, A store motion pass is run after global common -subexpression elimination. This pass will attempt to move stores out of loops. -When used in conjunction with @option{-fgcse-lm}, loops containing a load/store sequence -can be changed to a load before the loop and a store after the loop. +When @option{-fgcse-sm} is enabled, a store motion pass is run after +global common subexpression elimination. This pass will attempt to move +stores out of loops. When used in conjunction with @option{-fgcse-lm}, +loops containing a load/store sequence can be changed to a load before +the loop and a store after the loop. + +Enabled by default when gcse is enabled. + +@item -fgcse-las +@opindex fgcse-las +When @option{-fgcse-las} is enabled, the global common subexpression +elimination pass eliminates redundant loads that come after stores to the +same memory location (both partial and full redundacies). Enabled by default when gcse is enabled. diff --git a/gcc/flags.h b/gcc/flags.h index 93600fbd15fd..2a1a10ff679d 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -675,6 +675,11 @@ extern int flag_gcse_lm; extern int flag_gcse_sm; +/* Nonzero if we want to perform redundant load-after-store elimination + in gcse. */ + +extern int flag_gcse_las; + /* Perform branch target register optimization before prologue / epilogue threading. */ diff --git a/gcc/gcse.c b/gcc/gcse.c index 84524b37d198..3e09f02252bb 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -2205,6 +2205,49 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table *table) && oprs_available_p (pat, tmp)))) insert_set_in_table (pat, insn, table); } + /* In case of store we want to consider the memory value as avaiable in + the REG stored in that memory. This makes it possible to remove + redundant loads from due to stores to the same location. */ + else if (flag_gcse_las && GET_CODE (src) == REG && GET_CODE (dest) == MEM) + { + unsigned int regno = REGNO (src); + + /* Do not do this for constant/copy propagation. */ + if (! table->set_p + /* Only record sets of pseudo-regs in the hash table. */ + && regno >= FIRST_PSEUDO_REGISTER + /* Don't GCSE something if we can't do a reg/reg copy. */ + && can_copy_p (GET_MODE (src)) + /* GCSE commonly inserts instruction after the insn. We can't + do that easily for EH_REGION notes so disable GCSE on these + for now. */ + && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) + /* Is SET_DEST something we want to gcse? */ + && want_to_gcse_p (dest) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + /* Don't GCSE if it has attached REG_EQUIV note. + At this point this only function parameters should have + REG_EQUIV notes and if the argument slot is used somewhere + explicitly, it means address of parameter has been taken, + so we should not extend the lifetime of the pseudo. */ + && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0 + || GET_CODE (XEXP (note, 0)) != MEM)) + { + /* Stores are never anticipatable. */ + int antic_p = 0; + /* An expression is not available if its operands are + subsequently modified, including this insn. It's also not + available if this is a branch, because we can't insert + a set after the branch. */ + int avail_p = oprs_available_p (dest, insn) + && ! JUMP_P (insn); + + /* Record the memory expression (DEST) in the hash table. */ + insert_expr_in_table (dest, GET_MODE (dest), insn, + antic_p, avail_p, table); + } + } } static void @@ -5360,7 +5403,13 @@ pre_edge_insert (struct edge_list *edge_list, struct expr **index_map) reaching_reg <- expr old_reg <- reaching_reg because this way copy propagation can discover additional PRE - opportunities. But if this fails, we try the old way. */ + opportunities. But if this fails, we try the old way. + When "expr" is a store, i.e. + given "MEM <- old_reg", instead of adding after it + reaching_reg <- old_reg + it's better to add it before as follows: + reaching_reg <- old_reg + MEM <- reaching_reg. */ static void pre_insert_copy_insn (struct expr *expr, rtx insn) @@ -5395,22 +5444,38 @@ pre_insert_copy_insn (struct expr *expr, rtx insn) else abort (); - old_reg = SET_DEST (set); - - /* Check if we can modify the set destination in the original insn. */ - if (validate_change (insn, &SET_DEST (set), reg, 0)) + if (GET_CODE (SET_DEST (set)) == REG) { - new_insn = gen_move_insn (old_reg, reg); - new_insn = emit_insn_after (new_insn, insn); + old_reg = SET_DEST (set); + /* Check if we can modify the set destination in the original insn. */ + if (validate_change (insn, &SET_DEST (set), reg, 0)) + { + new_insn = gen_move_insn (old_reg, reg); + new_insn = emit_insn_after (new_insn, insn); - /* Keep register set table up to date. */ - replace_one_set (REGNO (old_reg), insn, new_insn); - record_one_set (regno, insn); + /* Keep register set table up to date. */ + replace_one_set (REGNO (old_reg), insn, new_insn); + record_one_set (regno, insn); + } + else + { + new_insn = gen_move_insn (reg, old_reg); + new_insn = emit_insn_after (new_insn, insn); + + /* Keep register set table up to date. */ + record_one_set (regno, new_insn); + } } - else + else /* This is possible only in case of a store to memory. */ { + old_reg = SET_SRC (set); new_insn = gen_move_insn (reg, old_reg); - new_insn = emit_insn_after (new_insn, insn); + + /* Check if we can modify the set source in the original insn. */ + if (validate_change (insn, &SET_SRC (set), reg, 0)) + new_insn = emit_insn_before (new_insn, insn); + else + new_insn = emit_insn_after (new_insn, insn); /* Keep register set table up to date. */ record_one_set (regno, new_insn); @@ -5423,7 +5488,6 @@ pre_insert_copy_insn (struct expr *expr, rtx insn) "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n", BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno); - update_ld_motion_stores (expr); } /* Copy available expressions that reach the redundant expression @@ -5432,7 +5496,7 @@ pre_insert_copy_insn (struct expr *expr, rtx insn) static void pre_insert_copies (void) { - unsigned int i; + unsigned int i, added_copy; struct expr *expr; struct occr *occr; struct occr *avail; @@ -5453,6 +5517,9 @@ pre_insert_copies (void) expression wasn't deleted anywhere. */ if (expr->reaching_reg == NULL) continue; + + /* Set when we add a copy for that expression. */ + added_copy = 0; for (occr = expr->antic_occr; occr != NULL; occr = occr->next) { @@ -5477,11 +5544,16 @@ pre_insert_copies (void) BLOCK_FOR_INSN (occr->insn))) continue; + added_copy = 1; + /* Copy the result of avail to reaching_reg. */ pre_insert_copy_insn (expr, insn); avail->copied_p = 1; } } + + if (added_copy) + update_ld_motion_stores (expr); } } diff --git a/gcc/opts.c b/gcc/opts.c index 9da64d543bb0..8418d1a96c2e 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1019,6 +1019,10 @@ common_handle_option (size_t scode, const char *arg, flag_gcse_sm = value; break; + case OPT_fgcse_las: + flag_gcse_las = value; + break; + case OPT_fgnu_linker: flag_gnu_linker = value; break; diff --git a/gcc/toplev.c b/gcc/toplev.c index e711135476ae..c1a05f6e26de 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -697,6 +697,11 @@ int flag_gcse_lm = 1; int flag_gcse_sm = 1; +/* Nonzero if we want to perfrom redundant load after store elimination + in gcse. */ + +int flag_gcse_las = 1; + /* Perform target register optimization before prologue / epilogue threading. */ @@ -1075,6 +1080,7 @@ static const lang_independent_options f_options[] = {"gcse", &flag_gcse, 1 }, {"gcse-lm", &flag_gcse_lm, 1 }, {"gcse-sm", &flag_gcse_sm, 1 }, + {"gcse-las", &flag_gcse_las, 1 }, {"branch-target-load-optimize", &flag_branch_target_load_optimize, 1 }, {"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1 }, {"loop-optimize", &flag_loop_optimize, 1 },