mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-22 03:40:26 +08:00
Fix FMA4 and XOP insns.
2009-12-02 Sebastian Pop <sebastian.pop@amd.com> Richard Henderson <rth@redhat.com> * config/i386/i386-protos.h (ix86_fma4_valid_op_p): Removed. * config/i386/i386.c (ix86_fma4_valid_op_p): Removed. * config/i386/i386.md: Do not use ix86_fma4_valid_op_p. * config/i386/sse.md (fma4_*): Remove alternative with operand 1 matching a memory access. Do not use ix86_fma4_valid_op_p. (xop_*): Same. Do not use ix86_fma4_valid_op_p in FMA4 and XOP splitters. Co-Authored-By: Richard Henderson <rth@redhat.com> From-SVN: r154970
This commit is contained in:
parent
aa356b75ed
commit
4926bb1d60
@ -1,3 +1,14 @@
|
||||
2009-12-02 Sebastian Pop <sebastian.pop@amd.com>
|
||||
Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386-protos.h (ix86_fma4_valid_op_p): Removed.
|
||||
* config/i386/i386.c (ix86_fma4_valid_op_p): Removed.
|
||||
* config/i386/i386.md: Do not use ix86_fma4_valid_op_p.
|
||||
* config/i386/sse.md (fma4_*): Remove alternative with operand 1
|
||||
matching a memory access. Do not use ix86_fma4_valid_op_p.
|
||||
(xop_*): Same.
|
||||
Do not use ix86_fma4_valid_op_p in FMA4 and XOP splitters.
|
||||
|
||||
2009-12-02 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.c (ix86_fixup_binary_operands): For FMA4, force
|
||||
|
@ -218,8 +218,7 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int);
|
||||
extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
|
||||
extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
|
||||
|
||||
extern bool ix86_fma4_valid_op_p (rtx [], rtx, int, bool, int, bool);
|
||||
extern void ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
|
||||
extern bool ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
|
||||
|
||||
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
|
||||
|
||||
|
@ -28807,197 +28807,35 @@ ix86_expand_round (rtx operand0, rtx operand1)
|
||||
emit_move_insn (operand0, res);
|
||||
}
|
||||
|
||||
/* Validate whether a FMA4 instruction is valid or not.
|
||||
OPERANDS is the array of operands.
|
||||
NUM is the number of operands.
|
||||
USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
|
||||
NUM_MEMORY is the maximum number of memory operands to accept.
|
||||
NUM_MEMORY less than zero is a special case to allow an operand
|
||||
of an instruction to be memory operation.
|
||||
when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
|
||||
|
||||
/* Fixup an FMA4 or XOP instruction that has 2 memory input references
|
||||
into a form the hardware will allow by using the destination
|
||||
register to load one of the memory operations. Presently this is
|
||||
used by the multiply/add routines to allow 2 memory references. */
|
||||
|
||||
bool
|
||||
ix86_fma4_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
|
||||
bool uses_oc0, int num_memory, bool commutative)
|
||||
{
|
||||
int mem_mask;
|
||||
int mem_count;
|
||||
int i;
|
||||
|
||||
/* Count the number of memory arguments */
|
||||
mem_mask = 0;
|
||||
mem_count = 0;
|
||||
for (i = 0; i < num; i++)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (operands[i]);
|
||||
if (register_operand (operands[i], mode))
|
||||
;
|
||||
|
||||
else if (memory_operand (operands[i], mode))
|
||||
{
|
||||
mem_mask |= (1 << i);
|
||||
mem_count++;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
rtx pattern = PATTERN (insn);
|
||||
|
||||
/* allow 0 for pcmov */
|
||||
if (GET_CODE (pattern) != SET
|
||||
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
|
||||
|| i < 2
|
||||
|| operands[i] != CONST0_RTX (mode))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Special case pmacsdq{l,h} where we allow the 3rd argument to be
|
||||
a memory operation. */
|
||||
if (num_memory < 0)
|
||||
{
|
||||
num_memory = -num_memory;
|
||||
if ((mem_mask & (1 << (num-1))) != 0)
|
||||
{
|
||||
mem_mask &= ~(1 << (num-1));
|
||||
mem_count--;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there were no memory operations, allow the insn */
|
||||
if (mem_mask == 0)
|
||||
return true;
|
||||
|
||||
/* Do not allow the destination register to be a memory operand. */
|
||||
else if (mem_mask & (1 << 0))
|
||||
return false;
|
||||
|
||||
/* If there are too many memory operations, disallow the instruction. While
|
||||
the hardware only allows 1 memory reference, before register allocation
|
||||
for some insns, we allow two memory operations sometimes in order to allow
|
||||
code like the following to be optimized:
|
||||
|
||||
float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
|
||||
|
||||
or similar cases that are vectorized into using the vfmaddss
|
||||
instruction. */
|
||||
else if (mem_count > num_memory)
|
||||
return false;
|
||||
|
||||
/* Don't allow more than one memory operation if not optimizing. */
|
||||
else if (mem_count > 1 && !optimize)
|
||||
return false;
|
||||
|
||||
else if (num == 4 && mem_count == 1)
|
||||
{
|
||||
/* formats (destination is the first argument), example vfmaddss:
|
||||
xmm1, xmm1, xmm2, xmm3/mem
|
||||
xmm1, xmm1, xmm2/mem, xmm3
|
||||
xmm1, xmm2, xmm3/mem, xmm1
|
||||
xmm1, xmm2/mem, xmm3, xmm1 */
|
||||
if (uses_oc0)
|
||||
return ((mem_mask == (1 << 1))
|
||||
|| (mem_mask == (1 << 2))
|
||||
|| (mem_mask == (1 << 3)));
|
||||
|
||||
/* format, example vpmacsdd:
|
||||
xmm1, xmm2, xmm3/mem, xmm1 */
|
||||
if (commutative)
|
||||
return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
|
||||
else
|
||||
return (mem_mask == (1 << 2));
|
||||
}
|
||||
|
||||
else if (num == 4 && num_memory == 2)
|
||||
{
|
||||
/* If there are two memory operations, we can load one of the memory ops
|
||||
into the destination register. This is for optimizing the
|
||||
multiply/add ops, which the combiner has optimized both the multiply
|
||||
and the add insns to have a memory operation. We have to be careful
|
||||
that the destination doesn't overlap with the inputs. */
|
||||
rtx op0 = operands[0];
|
||||
|
||||
if (reg_mentioned_p (op0, operands[1])
|
||||
|| reg_mentioned_p (op0, operands[2])
|
||||
|| reg_mentioned_p (op0, operands[3]))
|
||||
return false;
|
||||
|
||||
/* formats (destination is the first argument), example vfmaddss:
|
||||
xmm1, xmm1, xmm2, xmm3/mem
|
||||
xmm1, xmm1, xmm2/mem, xmm3
|
||||
xmm1, xmm2, xmm3/mem, xmm1
|
||||
xmm1, xmm2/mem, xmm3, xmm1
|
||||
|
||||
For the oc0 case, we will load either operands[1] or operands[3] into
|
||||
operands[0], so any combination of 2 memory operands is ok. */
|
||||
if (uses_oc0)
|
||||
return true;
|
||||
|
||||
/* format, example vpmacsdd:
|
||||
xmm1, xmm2, xmm3/mem, xmm1
|
||||
|
||||
For the integer multiply/add instructions be more restrictive and
|
||||
require operands[2] and operands[3] to be the memory operands. */
|
||||
if (commutative)
|
||||
return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
|
||||
else
|
||||
return (mem_mask == ((1 << 2) | (1 << 3)));
|
||||
}
|
||||
|
||||
else if (num == 3 && num_memory == 1)
|
||||
{
|
||||
/* formats, example vprotb:
|
||||
xmm1, xmm2, xmm3/mem
|
||||
xmm1, xmm2/mem, xmm3 */
|
||||
if (uses_oc0)
|
||||
return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
|
||||
|
||||
/* format, example vpcomeq:
|
||||
xmm1, xmm2, xmm3/mem */
|
||||
else
|
||||
return (mem_mask == (1 << 2));
|
||||
}
|
||||
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* Fixup an FMA4 instruction that has 2 memory input references into a form the
|
||||
hardware will allow by using the destination register to load one of the
|
||||
memory operations. Presently this is used by the multiply/add routines to
|
||||
allow 2 memory references. */
|
||||
|
||||
void
|
||||
ix86_expand_fma4_multiple_memory (rtx operands[],
|
||||
enum machine_mode mode)
|
||||
{
|
||||
rtx op0 = operands[0];
|
||||
rtx scratch = operands[0];
|
||||
|
||||
if (memory_operand (op0, mode)
|
||||
|| reg_mentioned_p (op0, operands[1])
|
||||
|| reg_mentioned_p (op0, operands[2])
|
||||
|| reg_mentioned_p (op0, operands[3]))
|
||||
gcc_unreachable ();
|
||||
gcc_assert (register_operand (operands[0], mode));
|
||||
gcc_assert (register_operand (operands[1], mode));
|
||||
gcc_assert (MEM_P (operands[2]) && MEM_P (operands[3]));
|
||||
|
||||
/* For 2 memory operands, pick either operands[1] or operands[3] to move into
|
||||
the destination register. */
|
||||
if (memory_operand (operands[1], mode))
|
||||
if (reg_mentioned_p (scratch, operands[1]))
|
||||
{
|
||||
emit_move_insn (op0, operands[1]);
|
||||
operands[1] = op0;
|
||||
}
|
||||
else if (memory_operand (operands[3], mode))
|
||||
{
|
||||
emit_move_insn (op0, operands[3]);
|
||||
operands[3] = op0;
|
||||
if (!can_create_pseudo_p ())
|
||||
return false;
|
||||
scratch = gen_reg_rtx (mode);
|
||||
}
|
||||
|
||||
emit_move_insn (scratch, operands[3]);
|
||||
if (rtx_equal_p (operands[2], operands[3]))
|
||||
operands[2] = operands[3] = scratch;
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
return;
|
||||
operands[3] = scratch;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Table of valid machine attributes. */
|
||||
|
@ -19248,7 +19248,7 @@
|
||||
(match_operand:MODEF 1 "register_operand" "x")
|
||||
(match_operand:MODEF 2 "register_operand" "x")
|
||||
(match_operand:MODEF 3 "register_operand" "x")))]
|
||||
"TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
|
||||
"TARGET_XOP"
|
||||
"vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
|
||||
[(set_attr "type" "sse4arg")])
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user