mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-07 14:37:25 +08:00
Fix for PR1654 - implement "movstrsi" pattern to copy simple blocks of memory.
From-SVN: r22467
This commit is contained in:
parent
feaefdd522
commit
d2a73f8ee2
@ -1,3 +1,22 @@
|
||||
Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com>
|
||||
|
||||
* config/m32r/m32r.h (m32r_block_immediate_operand): Add to
|
||||
PREDICATE_CODES.
|
||||
|
||||
* config/m32r/m32r.md: Add "movstrsi" and "movstrsi_internal"
|
||||
patterns.
|
||||
|
||||
* config/m32r/m32r.c (m32r_print_operand): Add 's' and 'p'
|
||||
operators.
|
||||
(block_move_call): New function: Call a library routine to copy a
|
||||
block of memory.
|
||||
(m32r_expand_block_move): New function: Expand a "movstrsi"
|
||||
pattern into a sequence of insns.
|
||||
(m32r_output_block_move): New function: Expand a
|
||||
"movstrsi_internal" pattern into a sequence of assembler opcodes.
|
||||
(m32r_block_immediate_operand): New function: Return true if the
|
||||
RTL is an integer constant, less than or equal to MAX_MOVE_BYTES.
|
||||
|
||||
Thu Sep 17 16:42:16 EDT 1998 Andrew MacLeod <amacleod@cygnus.com>
|
||||
|
||||
* except.c (start_catch_handler): Issue 'fatal' instead of 'error' and
|
||||
|
@ -1783,6 +1783,22 @@ m32r_print_operand (file, x, code)
|
||||
|
||||
switch (code)
|
||||
{
|
||||
/* The 's' and 'p' codes are used by output_block_move() to
|
||||
indicate post-increment 's'tores and 'p're-increment loads. */
|
||||
case 's':
|
||||
if (GET_CODE (x) == REG)
|
||||
fprintf (file, "@+%s", reg_names [REGNO (x)]);
|
||||
else
|
||||
output_operand_lossage ("invalid operand to %s code");
|
||||
return;
|
||||
|
||||
case 'p':
|
||||
if (GET_CODE (x) == REG)
|
||||
fprintf (file, "@%s+", reg_names [REGNO (x)]);
|
||||
else
|
||||
output_operand_lossage ("invalid operand to %p code");
|
||||
return;
|
||||
|
||||
case 'R' :
|
||||
/* Write second word of DImode or DFmode reference,
|
||||
register or memory. */
|
||||
@ -1822,7 +1838,7 @@ m32r_print_operand (file, x, code)
|
||||
rtx first, second;
|
||||
|
||||
split_double (x, &first, &second);
|
||||
fprintf (file, "0x%08lx",
|
||||
fprintf (file, "0x%08x",
|
||||
code == 'L' ? INTVAL (first) : INTVAL (second));
|
||||
}
|
||||
else
|
||||
@ -2209,3 +2225,250 @@ emit_cond_move (operands, insn)
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Use a library function to move some bytes. */
|
||||
static void
|
||||
block_move_call (dest_reg, src_reg, bytes_rtx)
|
||||
rtx dest_reg;
|
||||
rtx src_reg;
|
||||
rtx bytes_rtx;
|
||||
{
|
||||
/* We want to pass the size as Pmode, which will normally be SImode
|
||||
but will be DImode if we are using 64 bit longs and pointers. */
|
||||
if (GET_MODE (bytes_rtx) != VOIDmode
|
||||
&& GET_MODE (bytes_rtx) != Pmode)
|
||||
bytes_rtx = convert_to_mode (Pmode, bytes_rtx, 1);
|
||||
|
||||
#ifdef TARGET_MEM_FUNCTIONS
|
||||
emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "memcpy"), 0,
|
||||
VOIDmode, 3, dest_reg, Pmode, src_reg, Pmode,
|
||||
convert_to_mode (TYPE_MODE (sizetype), bytes_rtx,
|
||||
TREE_UNSIGNED (sizetype)),
|
||||
TYPE_MODE (sizetype));
|
||||
#else
|
||||
emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "bcopy"), 0,
|
||||
VOIDmode, 3, src_reg, Pmode, dest_reg, Pmode,
|
||||
convert_to_mode (TYPE_MODE (integer_type_node), bytes_rtx,
|
||||
TREE_UNSIGNED (integer_type_node)),
|
||||
TYPE_MODE (integer_type_node));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The maximum number of bytes to copy using pairs of load/store instructions.
|
||||
If a block is larger than this then a loop will be generated to copy
|
||||
MAX_MOVE_BYTES chunks at a time. The value of 32 is a semi-arbitary choice.
|
||||
A customer uses Dhrystome as their benchmark, and Dhrystone has a 31 byte
|
||||
string copy in it. */
|
||||
#define MAX_MOVE_BYTES 32
|
||||
|
||||
/* Expand string/block move operations.
|
||||
|
||||
operands[0] is the pointer to the destination.
|
||||
operands[1] is the pointer to the source.
|
||||
operands[2] is the number of bytes to move.
|
||||
operands[3] is the alignment. */
|
||||
|
||||
void
|
||||
m32r_expand_block_move (operands)
|
||||
rtx operands[];
|
||||
{
|
||||
rtx orig_dst = operands[0];
|
||||
rtx orig_src = operands[1];
|
||||
rtx bytes_rtx = operands[2];
|
||||
rtx align_rtx = operands[3];
|
||||
int constp = GET_CODE (bytes_rtx) == CONST_INT;
|
||||
HOST_WIDE_INT bytes = constp ? INTVAL (bytes_rtx) : 0;
|
||||
int align = INTVAL (align_rtx);
|
||||
int leftover;
|
||||
rtx src_reg;
|
||||
rtx dst_reg;
|
||||
|
||||
if (constp && bytes <= 0)
|
||||
return;
|
||||
|
||||
/* Move the address into scratch registers. */
|
||||
dst_reg = copy_addr_to_reg (XEXP (orig_dst, 0));
|
||||
src_reg = copy_addr_to_reg (XEXP (orig_src, 0));
|
||||
|
||||
if (align > UNITS_PER_WORD)
|
||||
align = UNITS_PER_WORD;
|
||||
|
||||
/* If we prefer size over speed, always use a function call.
|
||||
If we do not know the size, use a function call.
|
||||
If the blocks are not word aligned, use a function call. */
|
||||
if (optimize_size || ! constp || align != UNITS_PER_WORD)
|
||||
{
|
||||
block_move_call (dst_reg, src_reg, bytes_rtx);
|
||||
return;
|
||||
}
|
||||
|
||||
leftover = bytes % MAX_MOVE_BYTES;
|
||||
bytes -= leftover;
|
||||
|
||||
/* If necessary, generate a loop to handle the bulk of the copy. */
|
||||
if (bytes)
|
||||
{
|
||||
rtx label;
|
||||
rtx final_src;
|
||||
|
||||
bytes_rtx = GEN_INT (MAX_MOVE_BYTES);
|
||||
|
||||
/* If we are going to have to perform this loop more than
|
||||
once, then generate a label and compute the address the
|
||||
source register will contain upon completion of the final
|
||||
itteration. */
|
||||
if (bytes > MAX_MOVE_BYTES)
|
||||
{
|
||||
final_src = gen_reg_rtx (Pmode);
|
||||
|
||||
if (INT16_P(bytes))
|
||||
emit_insn (gen_addsi3 (final_src, src_reg, bytes_rtx));
|
||||
else
|
||||
{
|
||||
emit_insn (gen_movsi (final_src, bytes_rtx));
|
||||
emit_insn (gen_addsi3 (final_src, final_src, src_reg));
|
||||
}
|
||||
|
||||
label = gen_label_rtx ();
|
||||
emit_label (label);
|
||||
}
|
||||
|
||||
/* It is known that output_block_move() will update src_reg to point
|
||||
to the word after the end of the source block, and dst_reg to point
|
||||
to the last word of the destination block, provided that the block
|
||||
is MAX_MOVE_BYTES long. */
|
||||
emit_insn (gen_movstrsi_internal (dst_reg, src_reg, bytes_rtx));
|
||||
emit_insn (gen_addsi3 (dst_reg, dst_reg, GEN_INT (4)));
|
||||
|
||||
if (bytes > MAX_MOVE_BYTES)
|
||||
{
|
||||
emit_insn (gen_cmpsi (src_reg, final_src));
|
||||
emit_jump_insn (gen_bne (label));
|
||||
}
|
||||
}
|
||||
|
||||
if (leftover)
|
||||
emit_insn (gen_movstrsi_internal (dst_reg, src_reg, GEN_INT (leftover)));
|
||||
}
|
||||
|
||||
|
||||
/* Emit load/stores for a small constant word aligned block_move.
|
||||
|
||||
operands[0] is the memory address of the destination.
|
||||
operands[1] is the memory address of the source.
|
||||
operands[2] is the number of bytes to move.
|
||||
operands[3] is a temp register.
|
||||
operands[4] is a temp register. */
|
||||
|
||||
char *
|
||||
m32r_output_block_move (insn, operands)
|
||||
rtx insn;
|
||||
rtx operands[];
|
||||
{
|
||||
HOST_WIDE_INT bytes = INTVAL (operands[2]);
|
||||
int first_time;
|
||||
int got_extra = 0;
|
||||
|
||||
if (bytes < 1 || bytes > MAX_MOVE_BYTES)
|
||||
abort ();
|
||||
|
||||
/* We do not have a post-increment store available, so the first set of
|
||||
stores are done without any increment, then the remaining ones can use
|
||||
the pre-increment addressing mode.
|
||||
|
||||
Note: expand_block_move() also relies upon this behaviour when building
|
||||
loops to copy large blocks. */
|
||||
first_time = 1;
|
||||
|
||||
while (bytes > 0)
|
||||
{
|
||||
if (bytes >= 8)
|
||||
{
|
||||
if (first_time)
|
||||
{
|
||||
output_asm_insn ("ld\t%3, %p1", operands);
|
||||
output_asm_insn ("ld\t%4, %p1", operands);
|
||||
output_asm_insn ("st\t%3, @%0", operands);
|
||||
output_asm_insn ("st\t%4, %s0", operands);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_asm_insn ("ld\t%3, %p1", operands);
|
||||
output_asm_insn ("ld\t%4, %p1", operands);
|
||||
output_asm_insn ("st\t%3, %s0", operands);
|
||||
output_asm_insn ("st\t%4, %s0", operands);
|
||||
}
|
||||
|
||||
bytes -= 8;
|
||||
}
|
||||
else if (bytes >= 4)
|
||||
{
|
||||
if (bytes > 4)
|
||||
got_extra = 1;
|
||||
|
||||
output_asm_insn ("ld\t%3, %p1", operands);
|
||||
|
||||
if (got_extra)
|
||||
output_asm_insn ("ld\t%4, %p1", operands);
|
||||
|
||||
if (first_time)
|
||||
output_asm_insn ("st\t%3, @%0", operands);
|
||||
else
|
||||
output_asm_insn ("st\t%3, %s0", operands);
|
||||
|
||||
bytes -= 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Get the entire next word, even though we do not want all of it.
|
||||
The saves us from doing several smaller loads, and we assume that
|
||||
we cannot cause a page fault when at least part of the word is in
|
||||
valid memory. If got_extra is true then we have already loaded
|
||||
the next word as part of loading and storing the previous word. */
|
||||
if (! got_extra)
|
||||
output_asm_insn ("ld\t%4, @%1", operands);
|
||||
|
||||
if (bytes >= 2)
|
||||
{
|
||||
bytes -= 2;
|
||||
|
||||
output_asm_insn ("sth\t%4, @%0", operands);
|
||||
|
||||
/* If there is a byte left to store then increment the
|
||||
destination address and shift the contents of the source
|
||||
register down by 16 bits. We could not do the address
|
||||
increment in the store half word instruction, because it does
|
||||
not have an auto increment mode. */
|
||||
if (bytes > 0) /* assert (bytes == 1) */
|
||||
{
|
||||
output_asm_insn ("srai\t%4, #16", operands);
|
||||
output_asm_insn ("addi\t%0, #2", operands);
|
||||
}
|
||||
}
|
||||
|
||||
output_asm_insn ("stb\t%4, @%0", operands);
|
||||
|
||||
bytes = 0;
|
||||
}
|
||||
|
||||
first_time = 0;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
/* Return true if op is an integer constant, less than or equal to
|
||||
MAX_MOVE_BYTES. */
|
||||
int
|
||||
m32r_block_immediate_operand (op, mode)
|
||||
rtx op;
|
||||
int mode;
|
||||
{
|
||||
if (GET_CODE (op) != CONST_INT
|
||||
|| INTVAL (op) > MAX_MOVE_BYTES
|
||||
|| INTVAL (op) <= 0)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -1987,6 +1987,7 @@ enum m32r_function_type
|
||||
{ "cmp_int16_operand", { CONST_INT }}, \
|
||||
{ "call_address_operand", { SYMBOL_REF, LABEL_REF, CONST }}, \
|
||||
{ "small_insn_p", { INSN, CALL_INSN, JUMP_INSN }}, \
|
||||
{ "m32r_block_immediate_operand",{ CONST_INT }}, \
|
||||
{ "large_insn_p", { INSN, CALL_INSN, JUMP_INSN }},
|
||||
|
||||
/* Functions declared in m32r.c */
|
||||
@ -2078,3 +2079,7 @@ extern char *emit_cond_move PROTO((Rtx *, Rtx));
|
||||
|
||||
/* Needed by a peephole optimisation. */
|
||||
#define PRESERVE_DEATH_INFO_REGNO_P(regno) (regno < FIRST_PSEUDO_REGISTER)
|
||||
|
||||
extern char * m32r_output_block_move PROTO((Rtx, Rtx *));
|
||||
extern int m32r_block_immediate_operand PROTO((Rtx, int));
|
||||
extern void m32r_expand_block_move PROTO((Rtx *));
|
||||
|
@ -1759,3 +1759,39 @@
|
||||
(set_attr "length" "4")
|
||||
]
|
||||
)
|
||||
|
||||
;; Block moves, see m32r.c for more details.
|
||||
;; Argument 0 is the destination
|
||||
;; Argument 1 is the source
|
||||
;; Argument 2 is the length
|
||||
;; Argument 3 is the alignment
|
||||
|
||||
(define_expand "movstrsi"
|
||||
[(parallel [(set (match_operand:BLK 0 "general_operand" "")
|
||||
(match_operand:BLK 1 "general_operand" ""))
|
||||
(use (match_operand:SI 2 "immediate_operand" ""))
|
||||
(use (match_operand:SI 3 "immediate_operand" ""))])]
|
||||
""
|
||||
"
|
||||
{
|
||||
if (operands[0]) /* avoid unused code messages */
|
||||
{
|
||||
m32r_expand_block_move (operands);
|
||||
DONE;
|
||||
}
|
||||
}")
|
||||
|
||||
;; Insn generated by block moves
|
||||
|
||||
(define_insn "movstrsi_internal"
|
||||
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r")) ;; destination
|
||||
(mem:BLK (match_operand:SI 1 "register_operand" "r"))) ;; source
|
||||
(use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move
|
||||
(clobber (match_scratch:SI 3 "=&r")) ;; temp 1
|
||||
(clobber (match_scratch:SI 4 "=&r")) ;; temp 2
|
||||
(clobber (match_dup 0))
|
||||
(clobber (match_dup 1))]
|
||||
""
|
||||
"* return m32r_output_block_move (insn, operands);"
|
||||
[(set_attr "type" "store8")
|
||||
(set_attr "length" "72")]) ;; Maximum
|
||||
|
Loading…
Reference in New Issue
Block a user