mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-21 13:49:45 +08:00
i386.c (ix86_expand_clrmem): Move gen_cld down to the places where it is actually needed.
* config/i386/i386.c (ix86_expand_clrmem): Move gen_cld down to the places where it is actually needed. Don't use repz; stosb for -Os with sufficiently small constant sizes. For sufficiently small repz; stos{l,q} repeat counts use a sequence of stos{l,q} instructions instead. From-SVN: r85635
This commit is contained in:
parent
6797f908ee
commit
6b32b6286b
@ -1,3 +1,11 @@
|
||||
2004-08-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_clrmem): Move gen_cld down to
|
||||
the places where it is actually needed. Don't use repz; stosb
|
||||
for -Os with sufficiently small constant sizes.
|
||||
For sufficiently small repz; stos{l,q} repeat counts use a sequence
|
||||
of stos{l,q} instructions instead.
|
||||
|
||||
2004-08-06 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
|
||||
|
||||
PR tree-optimization/16807
|
||||
|
@ -11508,13 +11508,20 @@ ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
|
||||
if (destreg != XEXP (dst, 0))
|
||||
dst = replace_equiv_address_nv (dst, destreg);
|
||||
|
||||
emit_insn (gen_cld ());
|
||||
|
||||
/* When optimizing for size emit simple rep ; movsb instruction for
|
||||
counts not divisible by 4. */
|
||||
counts not divisible by 4. The movl $N, %ecx; rep; stosb
|
||||
sequence is 7 bytes long, so if optimizing for size and count is
|
||||
small enough that some stosl, stosw and stosb instructions without
|
||||
rep are shorter, fall back into the next if. */
|
||||
|
||||
if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
|
||||
if ((!optimize || optimize_size)
|
||||
&& (count == 0
|
||||
|| ((count & 0x03)
|
||||
&& (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
|
||||
{
|
||||
emit_insn (gen_cld ());
|
||||
|
||||
countreg = ix86_zero_extend_to_Pmode (count_exp);
|
||||
zeroreg = copy_to_mode_reg (QImode, const0_rtx);
|
||||
destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
|
||||
@ -11528,17 +11535,54 @@ ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
|
||||
int size = TARGET_64BIT && !optimize_size ? 8 : 4;
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
|
||||
emit_insn (gen_cld ());
|
||||
|
||||
zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
|
||||
if (count & ~(size - 1))
|
||||
{
|
||||
countreg = copy_to_mode_reg (counter_mode,
|
||||
GEN_INT ((count >> (size == 4 ? 2 : 3))
|
||||
& (TARGET_64BIT ? -1 : 0x3fffffff)));
|
||||
countreg = ix86_zero_extend_to_Pmode (countreg);
|
||||
destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
|
||||
destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
|
||||
emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
|
||||
offset = count & ~(size - 1);
|
||||
unsigned HOST_WIDE_INT repcount;
|
||||
unsigned int max_nonrep;
|
||||
|
||||
repcount = count >> (size == 4 ? 2 : 3);
|
||||
if (!TARGET_64BIT)
|
||||
repcount &= 0x3fffffff;
|
||||
|
||||
/* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
|
||||
movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
|
||||
bytes. In both cases the latter seems to be faster for small
|
||||
values of N. */
|
||||
max_nonrep = size == 4 ? 7 : 4;
|
||||
if (!optimize_size)
|
||||
switch (ix86_tune)
|
||||
{
|
||||
case PROCESSOR_PENTIUM4:
|
||||
case PROCESSOR_NOCONA:
|
||||
max_nonrep = 3;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (repcount <= max_nonrep)
|
||||
while (repcount-- > 0)
|
||||
{
|
||||
rtx mem = adjust_automodify_address_nv (dst,
|
||||
GET_MODE (zeroreg),
|
||||
destreg, offset);
|
||||
emit_insn (gen_strset (destreg, mem, zeroreg));
|
||||
offset += size;
|
||||
}
|
||||
else
|
||||
{
|
||||
countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
|
||||
countreg = ix86_zero_extend_to_Pmode (countreg);
|
||||
destexp = gen_rtx_ASHIFT (Pmode, countreg,
|
||||
GEN_INT (size == 4 ? 2 : 3));
|
||||
destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
|
||||
emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
|
||||
destexp));
|
||||
offset = count & ~(size - 1);
|
||||
}
|
||||
}
|
||||
if (size == 8 && (count & 0x04))
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user