re PR target/52908 (xop-mul-1:f9 miscompiled on bulldozer (-mxop))

PR target/52908
	* config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using
	xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high.
	(vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern
	instead of xop_mulv2div2di3_low.
	(xop_p<macs>dql): Fix vec_select selector.
	(xop_p<macs>dqh): Ditto.
	(xop_mulv2div2di3_low): Remove insn_and_split pattern.
	(xop_mulv2div2di3_high): Ditto.

testsuite/ChangeLog:

	PR target/52908
	* gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler
	directive to Scan for vpmuldq, not vpmacsdql.

From-SVN: r187354
This commit is contained in:
Uros Bizjak 2012-05-09 22:41:08 +02:00 committed by Uros Bizjak
parent c54e713661
commit d21a7b447a
4 changed files with 33 additions and 83 deletions

View File

@ -1,3 +1,15 @@
2012-05-09 Uros Bizjak <ubizjak@gmail.com>
PR target/52908
* config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using
xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high.
(vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern
instead of xop_mulv2div2di3_low.
(xop_p<macs>dql): Fix vec_select selector.
(xop_p<macs>dqh): Ditto.
(xop_mulv2div2di3_low): Remove insn_and_split pattern.
(xop_mulv2div2di3_high): Ditto.
2012-05-09 Manuel López-Ibáñez <manu@gcc.gnu.org>
* doc/extend.texi (Function Attributes): Point xref to section

View File

@ -5748,11 +5748,15 @@
if (TARGET_XOP)
{
rtx t3 = gen_reg_rtx (V2DImode);
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
emit_move_insn (t3, CONST0_RTX (V2DImode));
emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
DONE;
}
@ -5777,11 +5781,15 @@
if (TARGET_XOP)
{
rtx t3 = gen_reg_rtx (V2DImode);
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
emit_move_insn (t3, CONST0_RTX (V2DImode));
emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
DONE;
}
@ -9792,11 +9800,11 @@
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
(parallel [(const_int 1) (const_int 3)])))
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 1) (const_int 3)]))))
(parallel [(const_int 0) (const_int 2)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
@ -9810,93 +9818,17 @@
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
(parallel [(const_int 0) (const_int 2)])))
(parallel [(const_int 1) (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0) (const_int 2)]))))
(parallel [(const_int 1) (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
;; fake it with a multiply/add. In general, we expect the define_split to
;; occur before register allocation, so we have to handle the corner case where
;; the target is the same as operands 1/2
(define_insn_and_split "xop_mulv2div2di3_low"
[(set (match_operand:V2DI 0 "register_operand" "=&x")
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "register_operand" "%x")
(parallel [(const_int 1) (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 1) (const_int 3)])))))]
"TARGET_XOP"
"#"
"&& reload_completed"
[(set (match_dup 0)
(match_dup 3))
(set (match_dup 0)
(plus:V2DI
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
(match_dup 1)
(parallel [(const_int 1) (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_dup 2)
(parallel [(const_int 1) (const_int 3)]))))
(match_dup 0)))]
{
operands[3] = CONST0_RTX (V2DImode);
}
[(set_attr "type" "ssemul")
(set_attr "mode" "TI")])
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
;; fake it with a multiply/add. In general, we expect the define_split to
;; occur before register allocation, so we have to handle the corner case where
;; the target is the same as either operands[1] or operands[2]
(define_insn_and_split "xop_mulv2div2di3_high"
[(set (match_operand:V2DI 0 "register_operand" "=&x")
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "register_operand" "%x")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0) (const_int 2)])))))]
"TARGET_XOP"
"#"
"&& reload_completed"
[(set (match_dup 0)
(match_dup 3))
(set (match_dup 0)
(plus:V2DI
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
(match_dup 1)
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_dup 2)
(parallel [(const_int 0) (const_int 2)]))))
(match_dup 0)))]
{
operands[3] = CONST0_RTX (V2DImode);
}
[(set_attr "type" "ssemul")
(set_attr "mode" "TI")])
;; XOP parallel integer multiply/add instructions for the intrinisics
(define_insn "xop_p<macs>wd"
[(set (match_operand:V4SI 0 "register_operand" "=x")

View File

@ -1,3 +1,9 @@
2012-05-09 Uros Bizjak <ubizjak@gmail.com>
PR target/52908
* gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler
directive to Scan for vpmuldq, not vpmacsdql.
2012-05-09 Michael Matz <matz@suse.de>
PR tree-optimization/53185

View File

@ -32,5 +32,5 @@ int main ()
exit (0);
}
/* { dg-final { scan-assembler "vpmacsdql" } } */
/* { dg-final { scan-assembler "vpmuldq" } } */
/* { dg-final { scan-assembler "vpmacsdqh" } } */