mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-01 12:06:16 +08:00
i386: Optimize abs expansion [PR97873]
The patch introduces absM named pattern to generate optimal insn sequence for CMOVE_TARGET targets. Currently, the expansion goes through neg+max optabs, and the following code is generated: movl %edi, %eax negl %eax cmpl %edi, %eax cmovl %edi, %eax This sequence is unoptimal in two ways. a) The compare instruction is not needed, since NEG insn sets the sign flag based on the result. The CMOV can use sign flag to select between negated and original value: movl %edi, %eax negl %eax cmovs %edi, %eax b) On some targets, CMOV is undesirable due to its performance issues. In addition to TARGET_EXPAND_ABS bypass, the patch introduces STV conversion of abs RTX to use PABS SSE insn: vmovd %edi, %xmm0 vpabsd %xmm0, %xmm0 vmovd %xmm0, %eax The patch changes compare mode of NEG instruction to CCGOCmode, which is the same mode as the mode of SUB instruction. IOW, sign bit becomes usable. Also, the mode iterator of <maxmin:code><mode>3 pattern is changed to SWI48x instead of SWI248. The purpose of maxmin expander is to prepare max/min RTX for STV to eventually convert them to SSE PMAX/PMIN instructions, in order to *avoid* CMOV insns with general registers. 2020-11-20 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/97873 * config/i386/i386.md (*neg<mode>2_2): Rename from "*neg<mode>2_cmpz". Use CCGOCmode instead of CCZmode. (*negsi2_zext): Rename from *negsi2_cmpz_zext. Use CCGOCmode instead of CCZmode. (*neg<mode>_ccc_1): New insn pattern. (*neg<dwi>2_doubleword): Use *neg<mode>_ccc_1. (abs<mode>2): Add FLAGS_REG clobber. Use TARGET_CMOVE insn predicate. (*abs<mode>2_1): New insn_and_split pattern. (*absdi2_doubleword): Ditto. (<maxmin:code><mode>3): Use SWI48x mode iterator. (*<maxmin:code><mode>3): Use SWI48 mode iterator. * config/i386/i386-features.c (general_scalar_chain::compute_convert_gain): Handle ABS code. (general_scalar_chain::convert_insn): Ditto. (general_scalar_to_vector_candidate_p): Ditto. gcc/testsuite/ PR target/97873 * gcc.target/i386/pr97873.c: New test. * gcc.target/i386/pr97873-1.c: New test.
This commit is contained in:
parent
a774a6a2fb
commit
fdace75840
@ -581,7 +581,8 @@ general_scalar_chain::compute_convert_gain ()
|
||||
else if (GET_CODE (src) == NEG
|
||||
|| GET_CODE (src) == NOT)
|
||||
igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
|
||||
else if (GET_CODE (src) == SMAX
|
||||
else if (GET_CODE (src) == ABS
|
||||
|| GET_CODE (src) == SMAX
|
||||
|| GET_CODE (src) == SMIN
|
||||
|| GET_CODE (src) == UMAX
|
||||
|| GET_CODE (src) == UMIN)
|
||||
@ -986,13 +987,6 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
|
||||
|
||||
switch (GET_CODE (src))
|
||||
{
|
||||
case ASHIFT:
|
||||
case ASHIFTRT:
|
||||
case LSHIFTRT:
|
||||
convert_op (&XEXP (src, 0), insn);
|
||||
PUT_MODE (src, vmode);
|
||||
break;
|
||||
|
||||
case PLUS:
|
||||
case MINUS:
|
||||
case IOR:
|
||||
@ -1002,8 +996,14 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
|
||||
case SMIN:
|
||||
case UMAX:
|
||||
case UMIN:
|
||||
convert_op (&XEXP (src, 0), insn);
|
||||
convert_op (&XEXP (src, 1), insn);
|
||||
/* FALLTHRU */
|
||||
|
||||
case ABS:
|
||||
case ASHIFT:
|
||||
case ASHIFTRT:
|
||||
case LSHIFTRT:
|
||||
convert_op (&XEXP (src, 0), insn);
|
||||
PUT_MODE (src, vmode);
|
||||
break;
|
||||
|
||||
@ -1414,6 +1414,12 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case ABS:
|
||||
if ((mode == DImode && !TARGET_AVX512VL)
|
||||
|| (mode == SImode && !TARGET_SSSE3))
|
||||
return false;
|
||||
break;
|
||||
|
||||
case NEG:
|
||||
case NOT:
|
||||
break;
|
||||
|
@ -10062,8 +10062,8 @@
|
||||
"#"
|
||||
"reload_completed"
|
||||
[(parallel
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
(compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0)))
|
||||
[(set (reg:CCC FLAGS_REG)
|
||||
(ne:CCC (match_dup 1) (const_int 0)))
|
||||
(set (match_dup 0) (neg:DWIH (match_dup 1)))])
|
||||
(parallel
|
||||
[(set (match_dup 2)
|
||||
@ -10077,7 +10077,7 @@
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
|
||||
|
||||
(define_insn "*neg<mode>2_1"
|
||||
(define_insn "*neg<mode>_1"
|
||||
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
|
||||
(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
@ -10086,7 +10086,7 @@
|
||||
[(set_attr "type" "negnot")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*negsi2_1_zext"
|
||||
(define_insn "*negsi_1_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI
|
||||
(neg:SI (match_operand:SI 1 "register_operand" "0"))))
|
||||
@ -10096,36 +10096,46 @@
|
||||
[(set_attr "type" "negnot")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; The problem with neg is that it does not perform (compare x 0),
|
||||
;; it really performs (compare 0 x), which leaves us with the zero
|
||||
;; flag being the only useful item.
|
||||
|
||||
(define_insn "*neg<mode>2_cmpz"
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
(compare:CCZ
|
||||
(define_insn "*neg<mode>_2"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
|
||||
(const_int 0)))
|
||||
(const_int 0)))
|
||||
(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
|
||||
(neg:SWI (match_dup 1)))]
|
||||
"ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
|
||||
"ix86_match_ccmode (insn, CCGOCmode)
|
||||
&& ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
|
||||
"neg{<imodesuffix>}\t%0"
|
||||
[(set_attr "type" "negnot")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*negsi2_cmpz_zext"
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
(compare:CCZ
|
||||
(define_insn "*negsi_2_zext"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(neg:SI (match_operand:SI 1 "register_operand" "0"))
|
||||
(const_int 0)))
|
||||
(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI
|
||||
(neg:SI (match_dup 1))))]
|
||||
"TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
|
||||
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
|
||||
&& ix86_unary_operator_ok (NEG, SImode, operands)"
|
||||
"neg{l}\t%k0"
|
||||
[(set_attr "type" "negnot")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*neg<mode>_ccc"
|
||||
(define_insn "*neg<mode>_ccc_1"
|
||||
[(set (reg:CCC FLAGS_REG)
|
||||
(ne:CCC
|
||||
(match_operand:SWI 1 "nonimmediate_operand" "0")
|
||||
(const_int 0)))
|
||||
(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
|
||||
(neg:SWI (match_dup 1)))]
|
||||
""
|
||||
"neg{<imodesuffix>}\t%0"
|
||||
[(set_attr "type" "negnot")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*neg<mode>_ccc_2"
|
||||
[(set (reg:CCC FLAGS_REG)
|
||||
(ne:CCC
|
||||
(match_operand:SWI 1 "nonimmediate_operand" "0")
|
||||
@ -10169,27 +10179,102 @@
|
||||
;; Special expand pattern to handle integer mode abs
|
||||
|
||||
(define_expand "abs<mode>2"
|
||||
[(set (match_operand:SWI48x 0 "register_operand")
|
||||
(abs:SWI48x
|
||||
(match_operand:SWI48x 1 "register_operand")))]
|
||||
"TARGET_EXPAND_ABS"
|
||||
{
|
||||
machine_mode mode = <MODE>mode;
|
||||
[(parallel
|
||||
[(set (match_operand:SWI48x 0 "register_operand")
|
||||
(abs:SWI48x
|
||||
(match_operand:SWI48x 1 "general_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_CMOVE"
|
||||
{
|
||||
if (TARGET_EXPAND_ABS)
|
||||
{
|
||||
machine_mode mode = <MODE>mode;
|
||||
operands[1] = force_reg (mode, operands[1]);
|
||||
|
||||
/* Generate rtx abs using abs (x) = (((signed) x >> (W-1)) ^ x) -
|
||||
((signed) x >> (W-1)) */
|
||||
rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
|
||||
rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
|
||||
shift_amount, NULL_RTX,
|
||||
0, OPTAB_DIRECT);
|
||||
rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
|
||||
operands[0], 0, OPTAB_DIRECT);
|
||||
rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
|
||||
operands[0], 0, OPTAB_DIRECT);
|
||||
if (!rtx_equal_p (minus_dst, operands[0]))
|
||||
emit_move_insn (operands[0], minus_dst);
|
||||
DONE;
|
||||
})
|
||||
/* Generate rtx abs using:
|
||||
abs (x) = (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)) */
|
||||
|
||||
rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
|
||||
rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
|
||||
shift_amount, NULL_RTX,
|
||||
0, OPTAB_DIRECT);
|
||||
rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
|
||||
operands[0], 0, OPTAB_DIRECT);
|
||||
rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
|
||||
operands[0], 0, OPTAB_DIRECT);
|
||||
if (!rtx_equal_p (minus_dst, operands[0]))
|
||||
emit_move_insn (operands[0], minus_dst);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn_and_split "*abs<mode>2_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand")
|
||||
(abs:SWI48
|
||||
(match_operand:SWI48 1 "general_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_CMOVE && ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(parallel
|
||||
[(set (reg:CCGOC FLAGS_REG)
|
||||
(compare:CCGOC
|
||||
(neg:SWI48 (match_dup 1))
|
||||
(const_int 0)))
|
||||
(set (match_dup 2)
|
||||
(neg:SWI48 (match_dup 1)))])
|
||||
(set (match_dup 0)
|
||||
(if_then_else:SWI48
|
||||
(ge (reg:CCGOC FLAGS_REG) (const_int 0))
|
||||
(match_dup 2)
|
||||
(match_dup 1)))]
|
||||
{
|
||||
operands[1] = force_reg (<MODE>mode, operands[1]);
|
||||
operands[2] = gen_reg_rtx (<MODE>mode);
|
||||
})
|
||||
|
||||
(define_insn_and_split "*absdi2_doubleword"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(abs:DI
|
||||
(match_operand:DI 1 "general_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"!TARGET_64BIT && TARGET_CMOVE
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(parallel
|
||||
[(set (reg:CCC FLAGS_REG)
|
||||
(ne:CCC (match_dup 1) (const_int 0)))
|
||||
(set (match_dup 2) (neg:DWIH (match_dup 1)))])
|
||||
(parallel
|
||||
[(set (match_dup 5)
|
||||
(plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
|
||||
(match_dup 4))
|
||||
(const_int 0)))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(parallel
|
||||
[(set (reg:CCGOC FLAGS_REG)
|
||||
(compare:CCGOC
|
||||
(neg:SI (match_dup 5))
|
||||
(const_int 0)))
|
||||
(set (match_dup 5)
|
||||
(neg:SI (match_dup 5)))])
|
||||
(set (match_dup 0)
|
||||
(if_then_else:SI
|
||||
(ge (reg:CCGOC FLAGS_REG) (const_int 0))
|
||||
(match_dup 2)
|
||||
(match_dup 1)))
|
||||
(set (match_dup 3)
|
||||
(if_then_else:SI
|
||||
(ge (reg:CCGOC FLAGS_REG) (const_int 0))
|
||||
(match_dup 5)
|
||||
(match_dup 4)))]
|
||||
{
|
||||
operands[1] = force_reg (DImode, operands[1]);
|
||||
operands[2] = gen_reg_rtx (DImode);
|
||||
|
||||
split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);
|
||||
})
|
||||
|
||||
(define_expand "<code>tf2"
|
||||
[(set (match_operand:TF 0 "register_operand")
|
||||
@ -18881,33 +18966,32 @@
|
||||
|
||||
(define_expand "<code><mode>3"
|
||||
[(parallel
|
||||
[(set (match_operand:SWI248 0 "register_operand")
|
||||
(maxmin:SWI248
|
||||
(match_operand:SWI248 1 "register_operand")
|
||||
(match_operand:SWI248 2 "general_operand")))
|
||||
[(set (match_operand:SWI48x 0 "register_operand")
|
||||
(maxmin:SWI48x
|
||||
(match_operand:SWI48x 1 "register_operand")
|
||||
(match_operand:SWI48x 2 "general_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_CMOVE")
|
||||
|
||||
(define_insn_and_split "*<code><mode>3_1"
|
||||
[(set (match_operand:SWI248 0 "register_operand")
|
||||
(maxmin:SWI248
|
||||
(match_operand:SWI248 1 "register_operand")
|
||||
(match_operand:SWI248 2 "general_operand")))
|
||||
[(set (match_operand:SWI48 0 "register_operand")
|
||||
(maxmin:SWI48
|
||||
(match_operand:SWI48 1 "register_operand")
|
||||
(match_operand:SWI48 2 "general_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_CMOVE
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(if_then_else:SWI248 (match_dup 3)
|
||||
(if_then_else:SWI48 (match_dup 3)
|
||||
(match_dup 1)
|
||||
(match_dup 2)))]
|
||||
{
|
||||
machine_mode mode = <MODE>mode;
|
||||
rtx cmp_op = operands[2];
|
||||
|
||||
if (!register_operand (cmp_op, mode))
|
||||
operands[2] = force_reg (mode, cmp_op);
|
||||
operands[2] = force_reg (mode, cmp_op);
|
||||
|
||||
enum rtx_code code = <maxmin_rel>;
|
||||
|
||||
@ -18961,8 +19045,7 @@
|
||||
(match_dup 4)
|
||||
(match_dup 5)))]
|
||||
{
|
||||
if (!register_operand (operands[2], DImode))
|
||||
operands[2] = force_reg (DImode, operands[2]);
|
||||
operands[2] = force_reg (DImode, operands[2]);
|
||||
|
||||
split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);
|
||||
|
||||
|
12
gcc/testsuite/gcc.target/i386/pr97873-1.c
Normal file
12
gcc/testsuite/gcc.target/i386/pr97873-1.c
Normal file
@ -0,0 +1,12 @@
|
||||
/* PR target/97873 */
|
||||
/* { dg-do compile { target ia32 } } */
|
||||
/* { dg-options "-O2 -mavx512vl -mstv -mno-stackrealign" } */
|
||||
/* { dg-final { scan-assembler "pabsq" } } */
|
||||
|
||||
extern long long z;
|
||||
|
||||
void
|
||||
foo (long long x)
|
||||
{
|
||||
z = (x < 0) ? -x : x;
|
||||
}
|
9
gcc/testsuite/gcc.target/i386/pr97873.c
Normal file
9
gcc/testsuite/gcc.target/i386/pr97873.c
Normal file
@ -0,0 +1,9 @@
|
||||
/* PR target/97873 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic" } */
|
||||
/* { dg-final { scan-assembler-not "test|cmp" } } */
|
||||
|
||||
int foo (int x)
|
||||
{
|
||||
return (x < 0) ? -x : x;
|
||||
}
|
Loading…
Reference in New Issue
Block a user