mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-23 10:09:39 +08:00
re PR target/52908 (xop-mul-1:f9 miscompiled on bulldozer (-mxop))
PR target/52908 * config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high. (vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern instead of xop_mulv2div2di3_low. (xop_p<macs>dql): Fix vec_select selector. (xop_p<macs>dqh): Ditto. (xop_mulv2div2di3_low): Remove insn_and_split pattern. (xop_mulv2div2di3_high): Ditto. testsuite/ChangeLog: PR target/52908 * gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler directive to Scan for vpmuldq, not vpmacsdql. From-SVN: r187354
This commit is contained in:
parent
c54e713661
commit
d21a7b447a
@ -1,3 +1,15 @@
|
||||
2012-05-09 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/52908
|
||||
* config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using
|
||||
xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high.
|
||||
(vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern
|
||||
instead of xop_mulv2div2di3_low.
|
||||
(xop_p<macs>dql): Fix vec_select selector.
|
||||
(xop_p<macs>dqh): Ditto.
|
||||
(xop_mulv2div2di3_low): Remove insn_and_split pattern.
|
||||
(xop_mulv2div2di3_high): Ditto.
|
||||
|
||||
2012-05-09 Manuel López-Ibáñez <manu@gcc.gnu.org>
|
||||
|
||||
* doc/extend.texi (Function Attributes): Point xref to section
|
||||
|
@ -5748,11 +5748,15 @@
|
||||
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
rtx t3 = gen_reg_rtx (V2DImode);
|
||||
|
||||
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
|
||||
emit_move_insn (t3, CONST0_RTX (V2DImode));
|
||||
|
||||
emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
|
||||
DONE;
|
||||
}
|
||||
|
||||
@ -5777,11 +5781,15 @@
|
||||
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
rtx t3 = gen_reg_rtx (V2DImode);
|
||||
|
||||
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
|
||||
emit_move_insn (t3, CONST0_RTX (V2DImode));
|
||||
|
||||
emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
|
||||
DONE;
|
||||
}
|
||||
|
||||
@ -9792,11 +9800,11 @@
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
|
||||
(parallel [(const_int 1) (const_int 3)])))
|
||||
(parallel [(const_int 0) (const_int 2)])))
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 1) (const_int 3)]))))
|
||||
(parallel [(const_int 0) (const_int 2)]))))
|
||||
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
|
||||
"TARGET_XOP"
|
||||
"vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
|
||||
@ -9810,93 +9818,17 @@
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
|
||||
(parallel [(const_int 0) (const_int 2)])))
|
||||
(parallel [(const_int 1) (const_int 3)])))
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0) (const_int 2)]))))
|
||||
(parallel [(const_int 1) (const_int 3)]))))
|
||||
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
|
||||
"TARGET_XOP"
|
||||
"vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
|
||||
[(set_attr "type" "ssemuladd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
|
||||
;; fake it with a multiply/add. In general, we expect the define_split to
|
||||
;; occur before register allocation, so we have to handle the corner case where
|
||||
;; the target is the same as operands 1/2
|
||||
(define_insn_and_split "xop_mulv2div2di3_low"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=&x")
|
||||
(mult:V2DI
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 1 "register_operand" "%x")
|
||||
(parallel [(const_int 1) (const_int 3)])))
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 1) (const_int 3)])))))]
|
||||
"TARGET_XOP"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(match_dup 3))
|
||||
(set (match_dup 0)
|
||||
(plus:V2DI
|
||||
(mult:V2DI
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_dup 1)
|
||||
(parallel [(const_int 1) (const_int 3)])))
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_dup 2)
|
||||
(parallel [(const_int 1) (const_int 3)]))))
|
||||
(match_dup 0)))]
|
||||
{
|
||||
operands[3] = CONST0_RTX (V2DImode);
|
||||
}
|
||||
[(set_attr "type" "ssemul")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
|
||||
;; fake it with a multiply/add. In general, we expect the define_split to
|
||||
;; occur before register allocation, so we have to handle the corner case where
|
||||
;; the target is the same as either operands[1] or operands[2]
|
||||
(define_insn_and_split "xop_mulv2div2di3_high"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=&x")
|
||||
(mult:V2DI
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 1 "register_operand" "%x")
|
||||
(parallel [(const_int 0) (const_int 2)])))
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0) (const_int 2)])))))]
|
||||
"TARGET_XOP"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(match_dup 3))
|
||||
(set (match_dup 0)
|
||||
(plus:V2DI
|
||||
(mult:V2DI
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_dup 1)
|
||||
(parallel [(const_int 0) (const_int 2)])))
|
||||
(sign_extend:V2DI
|
||||
(vec_select:V2SI
|
||||
(match_dup 2)
|
||||
(parallel [(const_int 0) (const_int 2)]))))
|
||||
(match_dup 0)))]
|
||||
{
|
||||
operands[3] = CONST0_RTX (V2DImode);
|
||||
}
|
||||
[(set_attr "type" "ssemul")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
;; XOP parallel integer multiply/add instructions for the intrinisics
|
||||
(define_insn "xop_p<macs>wd"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
|
@ -1,3 +1,9 @@
|
||||
2012-05-09 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/52908
|
||||
* gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler
|
||||
directive to Scan for vpmuldq, not vpmacsdql.
|
||||
|
||||
2012-05-09 Michael Matz <matz@suse.de>
|
||||
|
||||
PR tree-optimization/53185
|
||||
|
@ -32,5 +32,5 @@ int main ()
|
||||
exit (0);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vpmacsdql" } } */
|
||||
/* { dg-final { scan-assembler "vpmuldq" } } */
|
||||
/* { dg-final { scan-assembler "vpmacsdqh" } } */
|
||||
|
Loading…
Reference in New Issue
Block a user