sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm as flags setting insn.

* config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm
	as flags setting insn.
	(sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn.

	* config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove.
	(UNSPEC_ROUND): New.
	("sse4_1_round<mode>2"): New insn pattern.
	("rint<mode>2"): Expand using "sse4_1_round<mode>2" pattern for
	SSE4.1 targets.
	("floor<mode>2"): Rename from floordf2 and floorsf2.  Macroize
	expander using SSEMODEF mode macro.  Expand using
	"sse4_1_round<mode>2" pattern for SSE4.1 targets.
	("ceil<mode>2"): Rename from ceildf2 and ceilsf2.  Macroize
	expander using SSEMODEF mode macro.  Expand using
	"sse4_1_round<mode>2" pattern for SSE4.1 targets.
	("btrunc<mode>2"): Rename from btruncdf2 and btruncsf2.  Macroize
	expander using SSEMODEF mode macro.  Expand using
	"sse4_1_round<mode>2" pattern for SSE4.1 targets.
	* config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use
	UNSPEC_ROUND instead of UNSPEC_ROUNDP.
	("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of
	UNSPEC_ROUNDS.

From-SVN: r125356
This commit is contained in:
Uros Bizjak 2007-06-06 08:53:29 +02:00
parent 69f2880c76
commit f28eb39cee
3 changed files with 124 additions and 142 deletions

View File

@ -1,3 +1,30 @@
2007-06-06 Uros Bizjak <ubizjak@gmail.com>
* config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm
as flags setting insn.
(sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn.
2007-06-06 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove.
(UNSPEC_ROUND): New.
("sse4_1_round<mode>2"): New insn pattern.
("rint<mode>2"): Expand using "sse4_1_round<mode>2" pattern for
SSE4.1 targets.
("floor<mode>2"): Rename from floordf2 and floorsf2. Macroize
expander using SSEMODEF mode macro. Expand using
"sse4_1_round<mode>2" pattern for SSE4.1 targets.
("ceil<mode>2"): Rename from ceildf2 and ceilsf2. Macroize
expander using SSEMODEF mode macro. Expand using
"sse4_1_round<mode>2" pattern for SSE4.1 targets.
("btrunc<mode>2"): Rename from btruncdf2 and btruncsf2. Macroize
expander using SSEMODEF mode macro. Expand using
"sse4_1_round<mode>2" pattern for SSE4.1 targets.
* config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use
UNSPEC_ROUND instead of UNSPEC_ROUNDP.
("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of
UNSPEC_ROUNDS.
2007-06-06 Jan Sjodin <jan.sjodin@amd.com>
Sebastian Pop <sebpop@gmail.com>
@ -53,7 +80,8 @@
* cfgexpand (label_rtx_for_bb): Likewise.
(expand_gimple_basic_block): Likewise.
* cfghooks.c (dump_bb): Likewise.
(lv_adjust_loop_header_phi): Avoid using C++ keywords as variable names.
(lv_adjust_loop_header_phi): Avoid using C++ keywords as
variable names.
(lv_add_condition_to_bb): Likewise.
* cfglayout (relink_block_chain): Cast according to the coding
conventions.
@ -64,7 +92,8 @@
(dump_recorded_exit): Likewise.
* cfgloop.h (enum loop_estimation): Move out of struct scope...
(struct loop): ... from here.
* cfgloopmanip.c (rpe_enum_p): Cast according to the coding conventions.
* cfgloopmanip.c (rpe_enum_p): Cast according to the coding
conventions.
* cfgrtl.c (rtl_create_basic_block): Likewise.
(rtl_split_block): Likewise.
(rtl_dump_bb): Likewise.

View File

@ -171,8 +171,7 @@
(UNSPEC_MPSADBW 138)
(UNSPEC_PHMINPOSUW 139)
(UNSPEC_PTEST 140)
(UNSPEC_ROUNDP 141)
(UNSPEC_ROUNDS 142)
(UNSPEC_ROUND 141)
; For SSE4.2 support
(UNSPEC_CRC32 143)
@ -16999,6 +16998,17 @@
})
(define_insn "sse4_1_round<mode>2"
[(set (match_operand:SSEMODEF 0 "register_operand" "=x")
(unspec:SSEMODEF [(match_operand:SSEMODEF 1 "register_operand" "x")
(match_operand:SI 2 "const_0_to_15_operand" "n")]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
"rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
(set_attr "mode" "<MODE>")])
(define_insn "rintxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
@ -17018,12 +17028,18 @@
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)"
&& (TARGET_SSE4_1 || !optimize_size))"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& !optimize_size)
ix86_expand_rint (operand0, operand1);
&& (TARGET_SSE4_1 || !optimize_size))
{
if (TARGET_SSE4_1)
emit_insn (gen_sse4_1_round<mode>2
(operands[0], operands[1], GEN_INT (0x04)));
else
ix86_expand_rint (operand0, operand1);
}
else
{
rtx op0 = gen_reg_rtx (XFmode);
@ -17044,7 +17060,7 @@
&& !flag_trapping_math && !flag_rounding_math
&& !optimize_size"
{
if ((<MODE>mode != DFmode) || TARGET_64BIT)
if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_round (operand0, operand1);
else
ix86_expand_rounddf_32 (operand0, operand1);
@ -17250,20 +17266,25 @@
DONE;
})
(define_expand "floordf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
"((TARGET_USE_FANCY_MATH_387
&& (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math))
&& !optimize_size"
(define_expand "floor<mode>2"
[(use (match_operand:SSEMODEF 0 "register_operand" ""))
(use (match_operand:SSEMODEF 1 "register_operand" ""))]
"(TARGET_USE_FANCY_MATH_387
&& (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
|| TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations && !optimize_size)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_SSE4_1 || !optimize_size))"
{
if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math)
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_SSE4_1 || !optimize_size))
{
if (TARGET_64BIT)
if (TARGET_SSE4_1)
emit_insn (gen_sse4_1_round<mode>2
(operands[0], operands[1], GEN_INT (0x01)));
else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, true);
else
ix86_expand_floorceildf_32 (operand0, operand1, true);
@ -17273,36 +17294,10 @@
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extenddfxf2 (op1, operands[1]));
emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
}
DONE;
})
(define_expand "floorsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
"((TARGET_USE_FANCY_MATH_387
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math))
&& !optimize_size"
{
if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math)
ix86_expand_floorceil (operand0, operand1, true);
else
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extendsfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
@ -17536,20 +17531,25 @@
DONE;
})
(define_expand "ceildf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
"((TARGET_USE_FANCY_MATH_387
&& (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math))
&& !optimize_size"
(define_expand "ceil<mode>2"
[(use (match_operand:SSEMODEF 0 "register_operand" ""))
(use (match_operand:SSEMODEF 1 "register_operand" ""))]
"(TARGET_USE_FANCY_MATH_387
&& (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
|| TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations && !optimize_size)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_SSE4_1 || !optimize_size))"
{
if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math)
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_SSE4_1 || !optimize_size))
{
if (TARGET_64BIT)
if (TARGET_SSE4_1)
emit_insn (gen_sse4_1_round<mode>2
(operands[0], operands[1], GEN_INT (0x02)));
else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, false);
else
ix86_expand_floorceildf_32 (operand0, operand1, false);
@ -17559,36 +17559,10 @@
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extenddfxf2 (op1, operands[1]));
emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
}
DONE;
})
(define_expand "ceilsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
"((TARGET_USE_FANCY_MATH_387
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math))
&& !optimize_size"
{
if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math)
ix86_expand_floorceil (operand0, operand1, false);
else
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extendsfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
@ -17820,20 +17794,25 @@
DONE;
})
(define_expand "btruncdf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
"((TARGET_USE_FANCY_MATH_387
&& (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math))
&& !optimize_size"
(define_expand "btrunc<mode>2"
[(use (match_operand:SSEMODEF 0 "register_operand" ""))
(use (match_operand:SSEMODEF 1 "register_operand" ""))]
"(TARGET_USE_FANCY_MATH_387
&& (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
|| TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations && !optimize_size)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_SSE4_1 || !optimize_size))"
{
if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
&& !flag_trapping_math)
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
&& (TARGET_SSE4_1 || !optimize_size))
{
if (TARGET_64BIT)
if (TARGET_SSE4_1)
emit_insn (gen_sse4_1_round<mode>2
(operands[0], operands[1], GEN_INT (0x03)));
else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_trunc (operand0, operand1);
else
ix86_expand_truncdf_32 (operand0, operand1);
@ -17843,36 +17822,10 @@
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extenddfxf2 (op1, operands[1]));
emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_trunc (op0, op1));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
}
DONE;
})
(define_expand "btruncsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
"((TARGET_USE_FANCY_MATH_387
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math))
&& !optimize_size"
{
if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
&& !flag_trapping_math)
ix86_expand_trunc (operand0, operand1);
else
{
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
emit_insn (gen_extendsfxf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_trunc (op0, op1));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})

View File

@ -6338,7 +6338,7 @@
[(set (match_operand:V2DF 0 "register_operand" "=x")
(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
(match_operand:SI 2 "const_0_to_15_operand" "n")]
UNSPEC_ROUNDP))]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
"roundpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
@ -6349,7 +6349,7 @@
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
(match_operand:SI 2 "const_0_to_15_operand" "n")]
UNSPEC_ROUNDP))]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
"roundps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
@ -6361,7 +6361,7 @@
(vec_merge:V2DF
(unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
(match_operand:SI 3 "const_0_to_15_operand" "n")]
UNSPEC_ROUNDS)
UNSPEC_ROUND)
(match_operand:V2DF 1 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE4_1"
@ -6375,7 +6375,7 @@
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
(match_operand:SI 3 "const_0_to_15_operand" "n")]
UNSPEC_ROUNDS)
UNSPEC_ROUND)
(match_operand:V4SF 1 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE4_1"
@ -6504,14 +6504,14 @@
(match_operand:SI 3 "register_operand" "d,d,d,d")
(match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
UNSPEC_PCMPESTR))
(clobber (match_scratch:SI 5 "=c,c,X,X"))
(clobber (match_scratch:V16QI 6 "=X,X,Y0,Y0"))]
(clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
(clobber (match_scratch:SI 6 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
pcmpestri\t{%4, %2, %0|%0, %2, %4}
pcmpestri\t{%4, %2, %0|%0, %2, %4}
pcmpestrm\t{%4, %2, %0|%0, %2, %4}
pcmpestrm\t{%4, %2, %0|%0, %2, %4}"
pcmpestrm\t{%4, %2, %0|%0, %2, %4}
pcmpestri\t{%4, %2, %0|%0, %2, %4}
pcmpestri\t{%4, %2, %0|%0, %2, %4}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
@ -6613,14 +6613,14 @@
(match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
(match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
UNSPEC_PCMPISTR))
(clobber (match_scratch:SI 3 "=c,c,X,X"))
(clobber (match_scratch:V16QI 4 "=X,X,Y0,Y0"))]
(clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
(clobber (match_scratch:SI 4 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
pcmpistri\t{%2, %1, %0|%0, %1, %2}
pcmpistri\t{%2, %1, %0|%0, %1, %2}
pcmpistrm\t{%2, %1, %0|%0, %1, %2}
pcmpistrm\t{%2, %1, %0|%0, %1, %2}"
pcmpistrm\t{%2, %1, %0|%0, %1, %2}
pcmpistri\t{%2, %1, %0|%0, %1, %2}
pcmpistri\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")