mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-26 01:40:55 +08:00
dse.c (find_shift_sequence): New function.
2007-09-13 Eric Christopher <echristo@apple.com> Kenneth Zadeck <zadeck@naturalbridge.com> * dse.c (find_shift_sequence): New function. (replace_read): Add case to remove read if it requires shift. * config/i386/i386.c (ix86_expand_prologue): Fixed typo in comment. Co-Authored-By: Kenneth Zadeck <zadeck@naturalbridge.com> From-SVN: r128481
This commit is contained in:
parent
46c2514e82
commit
8660aaae7a
@ -1,3 +1,10 @@
|
|||||||
|
2007-09-13 Eric Christopher <echristo@apple.com>
|
||||||
|
Kenneth Zadeck <zadeck@naturalbridge.com>
|
||||||
|
|
||||||
|
* dse.c (find_shift_sequence): New function.
|
||||||
|
(replace_read): Add case to remove read if it requires shift.
|
||||||
|
* config/i386/i386.c (ix86_expand_prologue): Fixed typo in comment.
|
||||||
|
|
||||||
2007-09-13 Tom Tromey <tromey@redhat.com>
|
2007-09-13 Tom Tromey <tromey@redhat.com>
|
||||||
|
|
||||||
* c-common.c (fname_as_string): Update.
|
* c-common.c (fname_as_string): Update.
|
||||||
|
@ -6384,7 +6384,7 @@ ix86_expand_prologue (void)
|
|||||||
insn = emit_insn (gen_set_got (pic_offset_table_rtx));
|
insn = emit_insn (gen_set_got (pic_offset_table_rtx));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prevent function calls from be scheduled before the call to mcount.
|
/* Prevent function calls from being scheduled before the call to mcount.
|
||||||
In the pic_reg_used case, make sure that the got load isn't deleted. */
|
In the pic_reg_used case, make sure that the got load isn't deleted. */
|
||||||
if (current_function_profile)
|
if (current_function_profile)
|
||||||
{
|
{
|
||||||
|
247
gcc/dse.c
247
gcc/dse.c
@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
|
|||||||
#include "expr.h"
|
#include "expr.h"
|
||||||
#include "recog.h"
|
#include "recog.h"
|
||||||
#include "dse.h"
|
#include "dse.h"
|
||||||
|
#include "optabs.h"
|
||||||
#include "dbgcnt.h"
|
#include "dbgcnt.h"
|
||||||
|
|
||||||
/* This file contains three techniques for performing Dead Store
|
/* This file contains three techniques for performing Dead Store
|
||||||
@ -1381,6 +1382,112 @@ dump_insn_info (const char * start, insn_info_t insn_info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* If the modes are different and the value's source and target do not
|
||||||
|
line up, we need to extract the value from lower part of the rhs of
|
||||||
|
the store, shift it, and then put it into a form that can be shoved
|
||||||
|
into the read_insn. This function generates a right SHIFT of a
|
||||||
|
value that is at least ACCESS_SIZE bytes wide of READ_MODE. The
|
||||||
|
shift sequence is returned or NULL if we failed to find a
|
||||||
|
shift. */
|
||||||
|
|
||||||
|
static rtx
|
||||||
|
find_shift_sequence (rtx read_reg,
|
||||||
|
int access_size,
|
||||||
|
store_info_t store_info,
|
||||||
|
read_info_t read_info,
|
||||||
|
int shift)
|
||||||
|
{
|
||||||
|
enum machine_mode store_mode = GET_MODE (store_info->mem);
|
||||||
|
enum machine_mode read_mode = GET_MODE (read_info->mem);
|
||||||
|
|
||||||
|
/* Some machines like the x86 have shift insns for each size of
|
||||||
|
operand. Other machines like the ppc or the ia-64 may only have
|
||||||
|
shift insns that shift values within 32 or 64 bit registers.
|
||||||
|
This loop tries to find the smallest shift insn that will right
|
||||||
|
justify the value we want to read but is available in one insn on
|
||||||
|
the machine. */
|
||||||
|
|
||||||
|
while (access_size < UNITS_PER_WORD)
|
||||||
|
{
|
||||||
|
rtx target;
|
||||||
|
enum machine_mode new_mode
|
||||||
|
= smallest_mode_for_size (access_size * BITS_PER_UNIT,
|
||||||
|
GET_MODE_CLASS (read_mode));
|
||||||
|
rtx new_reg = gen_reg_rtx (new_mode);
|
||||||
|
|
||||||
|
start_sequence ();
|
||||||
|
|
||||||
|
/* In theory we could also check for an ashr. Ian Taylor knows
|
||||||
|
of one dsp where the cost of these two was not the same. But
|
||||||
|
this really is a rare case anyway. */
|
||||||
|
target = expand_binop (new_mode, lshr_optab, new_reg,
|
||||||
|
GEN_INT (shift), new_reg, 1, OPTAB_DIRECT);
|
||||||
|
|
||||||
|
if (target == new_reg)
|
||||||
|
{
|
||||||
|
rtx shift_seq = get_insns ();
|
||||||
|
end_sequence ();
|
||||||
|
|
||||||
|
/* If cost is too great, set target to NULL and
|
||||||
|
let the iteration happen. */
|
||||||
|
if (shift_seq != NULL)
|
||||||
|
{
|
||||||
|
int cost = 0;
|
||||||
|
rtx insn;
|
||||||
|
|
||||||
|
for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
|
||||||
|
cost += insn_rtx_cost (insn);
|
||||||
|
|
||||||
|
/* The computation up to here is essentially independent
|
||||||
|
of the arguments and could be precomputed. It may
|
||||||
|
not be worth doing so. We could precompute if
|
||||||
|
worthwhile or at least cache the results. The result
|
||||||
|
technically depends on SHIFT, ACCESS_SIZE, and
|
||||||
|
GET_MODE_CLASS (READ_MODE). But in practice the
|
||||||
|
answer will depend only on ACCESS_SIZE. */
|
||||||
|
|
||||||
|
if (cost <= COSTS_N_INSNS (1))
|
||||||
|
{
|
||||||
|
/* We found an acceptable shift. Generate a move to
|
||||||
|
take the value from the store and put it into the
|
||||||
|
shift pseudo, then shift it, then generate another
|
||||||
|
move to put in into the target of the read. */
|
||||||
|
start_sequence ();
|
||||||
|
emit_move_insn (new_reg, gen_lowpart (new_mode, store_info->rhs));
|
||||||
|
emit_insn (shift_seq);
|
||||||
|
emit_move_insn (read_reg, gen_lowpart (read_mode, new_reg));
|
||||||
|
|
||||||
|
if (dump_file)
|
||||||
|
{
|
||||||
|
fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
|
||||||
|
REGNO (new_reg), GET_MODE_NAME (new_mode),
|
||||||
|
REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
|
||||||
|
|
||||||
|
fprintf (dump_file, " -- with shift of r%d by %d\n",
|
||||||
|
REGNO(new_reg), shift);
|
||||||
|
fprintf (dump_file, " -- and second extract insn r%d:%s = r%d:%s\n",
|
||||||
|
REGNO (read_reg), GET_MODE_NAME (read_mode),
|
||||||
|
REGNO (new_reg), GET_MODE_NAME (new_mode));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the three insn sequence and return it. */
|
||||||
|
shift_seq = get_insns ();
|
||||||
|
end_sequence ();
|
||||||
|
return shift_seq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/* End the sequence. */
|
||||||
|
end_sequence ();
|
||||||
|
|
||||||
|
access_size = access_size * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Take a sequence of:
|
/* Take a sequence of:
|
||||||
A <- r1
|
A <- r1
|
||||||
...
|
...
|
||||||
@ -1392,7 +1499,23 @@ dump_insn_info (const char * start, insn_info_t insn_info)
|
|||||||
...
|
...
|
||||||
... <- r2
|
... <- r2
|
||||||
|
|
||||||
The STORE_INFO and STORE_INFO are for the store and the READ_INFO
|
or
|
||||||
|
|
||||||
|
r3 <- extract (r1)
|
||||||
|
r3 <- r3 >> shift
|
||||||
|
r2 <- extract (r3)
|
||||||
|
... <- r2
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
r2 <- extract (r1)
|
||||||
|
... <- r2
|
||||||
|
|
||||||
|
Depending on the alignment and the mode of the store and
|
||||||
|
subsequent load.
|
||||||
|
|
||||||
|
|
||||||
|
The STORE_INFO and STORE_INSN are for the store and READ_INFO
|
||||||
and READ_INSN are for the read. Return true if the replacement
|
and READ_INSN are for the read. Return true if the replacement
|
||||||
went ok. */
|
went ok. */
|
||||||
|
|
||||||
@ -1400,52 +1523,120 @@ static bool
|
|||||||
replace_read (store_info_t store_info, insn_info_t store_insn,
|
replace_read (store_info_t store_info, insn_info_t store_insn,
|
||||||
read_info_t read_info, insn_info_t read_insn, rtx *loc)
|
read_info_t read_info, insn_info_t read_insn, rtx *loc)
|
||||||
{
|
{
|
||||||
|
enum machine_mode store_mode = GET_MODE (store_info->mem);
|
||||||
|
enum machine_mode read_mode = GET_MODE (read_info->mem);
|
||||||
|
int shift;
|
||||||
|
int access_size; /* In bytes. */
|
||||||
|
rtx read_reg = gen_reg_rtx (read_mode);
|
||||||
|
rtx shift_seq = NULL;
|
||||||
|
|
||||||
if (!dbg_cnt (dse))
|
if (!dbg_cnt (dse))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (dump_file)
|
if (GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode))
|
||||||
fprintf (dump_file, "generating move to replace load at %d from store at %d\n",
|
return false;
|
||||||
INSN_UID (read_insn->insn), INSN_UID (store_insn->insn));
|
|
||||||
if (GET_MODE (store_info->mem) == GET_MODE (read_info->mem))
|
/* To get here the read is within the boundaries of the write so
|
||||||
|
shift will never be negative. Start out with the shift being in
|
||||||
|
bytes. */
|
||||||
|
if (BYTES_BIG_ENDIAN)
|
||||||
|
shift = store_info->end - read_info->end;
|
||||||
|
else
|
||||||
|
shift = read_info->begin - store_info->begin;
|
||||||
|
|
||||||
|
access_size = shift + GET_MODE_SIZE (read_mode);
|
||||||
|
|
||||||
|
/* From now on it is bits. */
|
||||||
|
shift *= BITS_PER_UNIT;
|
||||||
|
|
||||||
|
/* We need to keep this in perspective. We are replacing a read
|
||||||
|
with a sequence of insns, but the read will almost certainly be
|
||||||
|
in cache, so it is not going to be an expensive one. Thus, we
|
||||||
|
are not willing to do a multi insn shift or worse a subroutine
|
||||||
|
call to get rid of the read. */
|
||||||
|
if (shift)
|
||||||
{
|
{
|
||||||
rtx new_reg = gen_reg_rtx (GET_MODE (store_info->mem));
|
if (access_size > UNITS_PER_WORD || FLOAT_MODE_P (store_mode))
|
||||||
if (validate_change (read_insn->insn, loc, new_reg, 0))
|
return false;
|
||||||
|
|
||||||
|
shift_seq = find_shift_sequence (read_reg, access_size, store_info,
|
||||||
|
read_info, shift);
|
||||||
|
if (!shift_seq)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dump_file)
|
||||||
|
fprintf (dump_file, "replacing load at %d from store at %d\n",
|
||||||
|
INSN_UID (read_insn->insn), INSN_UID (store_insn->insn));
|
||||||
|
|
||||||
|
if (validate_change (read_insn->insn, loc, read_reg, 0))
|
||||||
{
|
{
|
||||||
rtx insns;
|
rtx insns;
|
||||||
deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
|
deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
|
||||||
|
|
||||||
|
if (read_mode == store_mode)
|
||||||
|
{
|
||||||
start_sequence ();
|
start_sequence ();
|
||||||
emit_move_insn (new_reg, store_info->rhs);
|
|
||||||
|
/* The modes are the same and everything lines up. Just
|
||||||
|
generate a simple move. */
|
||||||
|
emit_move_insn (read_reg, store_info->rhs);
|
||||||
|
if (dump_file)
|
||||||
|
fprintf (dump_file, " -- adding move insn r%d = r%d\n",
|
||||||
|
REGNO (read_reg), REGNO (store_info->rhs));
|
||||||
insns = get_insns ();
|
insns = get_insns ();
|
||||||
end_sequence ();
|
end_sequence ();
|
||||||
emit_insn_before (insns, store_insn->insn);
|
}
|
||||||
|
else if (shift)
|
||||||
|
insns = shift_seq;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* The modes are different but the lsb are in the same
|
||||||
|
place, we need to extract the value in the right from the
|
||||||
|
rhs of the store. */
|
||||||
|
start_sequence ();
|
||||||
|
emit_move_insn (read_reg, gen_lowpart (read_mode, store_info->rhs));
|
||||||
|
|
||||||
if (dump_file)
|
if (dump_file)
|
||||||
fprintf (dump_file, " -- adding move insn %d: r%d = r%d\n",
|
fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
|
||||||
INSN_UID (insns), REGNO (new_reg), REGNO (store_info->rhs));
|
REGNO (read_reg), GET_MODE_NAME (read_mode),
|
||||||
|
REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
|
||||||
|
insns = get_insns ();
|
||||||
|
end_sequence ();
|
||||||
|
}
|
||||||
|
|
||||||
/* And now for the cludge part: cselib croaks if you just
|
/* Insert this right before the store insn where it will be safe
|
||||||
|
from later insns that might change it before the read. */
|
||||||
|
emit_insn_before (insns, store_insn->insn);
|
||||||
|
|
||||||
|
/* And now for the kludge part: cselib croaks if you just
|
||||||
return at this point. There are two reasons for this:
|
return at this point. There are two reasons for this:
|
||||||
|
|
||||||
1) Cselib has an idea of how many pseudos there are and
|
1) Cselib has an idea of how many pseudos there are and
|
||||||
that does not include the new one we just added.
|
that does not include the new ones we just added.
|
||||||
|
|
||||||
2) Cselib does not know about the move insn we added
|
2) Cselib does not know about the move insn we added
|
||||||
above the store_info, and there is no way to tell it
|
above the store_info, and there is no way to tell it
|
||||||
about it, because it has "moved on".
|
about it, because it has "moved on".
|
||||||
|
|
||||||
So we are just going to have to lie. The move insn is
|
Problem (1) is fixable with a certain amount of engineering.
|
||||||
not really an issue, cselib did not see it. But the use
|
Problem (2) is requires starting the bb from scratch. This
|
||||||
of the new pseudo read_insn is a real problem. The way
|
could be expensive.
|
||||||
that we solve this problem is that we are just going to
|
|
||||||
put the mem back keep a table of mems to get rid of. At
|
So we are just going to have to lie. The move/extraction
|
||||||
the end of the basic block we can put it back. */
|
insns are not really an issue, cselib did not see them. But
|
||||||
|
the use of the new pseudo read_insn is a real problem because
|
||||||
|
cselib has not scanned this insn. The way that we solve this
|
||||||
|
problem is that we are just going to put the mem back for now
|
||||||
|
and when we are finished with the block, we undo this. We
|
||||||
|
keep a table of mems to get rid of. At the end of the basic
|
||||||
|
block we can put them back. */
|
||||||
|
|
||||||
*loc = read_info->mem;
|
*loc = read_info->mem;
|
||||||
deferred_change->next = deferred_change_list;
|
deferred_change->next = deferred_change_list;
|
||||||
deferred_change_list = deferred_change;
|
deferred_change_list = deferred_change;
|
||||||
deferred_change->loc = loc;
|
deferred_change->loc = loc;
|
||||||
deferred_change->reg = new_reg;
|
deferred_change->reg = read_reg;
|
||||||
|
|
||||||
/* Get rid of the read_info, from the point of view of the
|
/* Get rid of the read_info, from the point of view of the
|
||||||
rest of dse, play like this read never happened. */
|
rest of dse, play like this read never happened. */
|
||||||
@ -1460,21 +1651,6 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Someone with excellent rtl skills needs to fill this in. You
|
|
||||||
are guaranteed that the read is of the same size or smaller
|
|
||||||
than the store, and that the read does not hang off one of
|
|
||||||
the ends of the store. But the offsets of each must be
|
|
||||||
checked because the read does not have to line up on either
|
|
||||||
end of the store so the begin fields need to be examined in
|
|
||||||
both the store_info and read_info. */
|
|
||||||
if (dump_file)
|
|
||||||
fprintf (dump_file, " -- complex load, currently unsupported.\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* A for_each_rtx callback in which DATA is the bb_info. Check to see
|
/* A for_each_rtx callback in which DATA is the bb_info. Check to see
|
||||||
if LOC is a mem and if it is look at the address and kill any
|
if LOC is a mem and if it is look at the address and kill any
|
||||||
@ -3105,4 +3281,3 @@ struct tree_opt_pass pass_rtl_dse2 =
|
|||||||
TODO_ggc_collect, /* todo_flags_finish */
|
TODO_ggc_collect, /* todo_flags_finish */
|
||||||
'w' /* letter */
|
'w' /* letter */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user