2
0
mirror of git://gcc.gnu.org/git/gcc.git synced 2025-04-18 01:40:35 +08:00

Optimize double word negation of zero extended values on x86.

It's not uncommon for GCC to convert between a (zero or one) Boolean
value and a (zero or all ones) mask value, possibly of a wider type,
using negation.

Currently on x86_64, the following simple test case:
__int128 foo(unsigned long x) { return -(__int128)x; }

compiles with -O2 to:

        movq    %rdi, %rax
        xorl    %edx, %edx
        negq    %rax
        adcq    $0, %rdx
        negq    %rdx
        ret

with this patch, which adds an additional peephole2 to i386.md,
we instead generate the improved:

        movq    %rdi, %rax
        negq    %rax
        sbbq    %rdx, %rdx
        ret

[and likewise for the (DImode) long long version using -m32.]
A peephole2 is appropriate as the double word negation and the
operation providing the xor are typically only split after combine.

In fact, the new peephole2 sequence:
;; Convert:
;;   xorl %edx, %edx
;;   negl %eax
;;   adcl $0, %edx
;;   negl %edx
;; to:
;;   negl %eax
;;   sbbl %edx, %edx    // *x86_mov<mode>cc_0_m1

is nearly identical to (and placed immediately after) the existing:
;; Convert:
;;   mov %esi, %edx
;;   negl %eax
;;   adcl $0, %edx
;;   negl %edx
;; to:
;;   xorl %edx, %edx
;;   negl %eax
;;   sbbl %esi, %edx

One potential objection/concern is that "sbb? %reg,%reg" may possibly be
incorrectly perceived as a false register dependency on older hardware,
much like "xor? %reg,%reg" may be perceived as a false dependency on
really old hardware.  This doesn't currently appear to be a concern
for the i386 backend's *x86_move<mode>cc_0_m1 as shown by the following
test code:

int bar(unsigned int x, unsigned int y) {
  return x > y ? -1 : 0;
}

which currently generates a "naked" sbb:
        cmp     esi, edi
        sbb     eax, eax
        ret

If anyone does potentially encounter a stall, it would easy to add
a splitter or peephole2 controlled by a tuning flag to insert an additional
xor to break the false dependency chain (when not optimizing for size),
but I don't believe this is required on recent microarchitectures.

2022-05-24 Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/i386/i386.md (peephole2): Convert xor;neg;adc;neg,
	i.e. a double word negation of a zero extended operand, to
	neg;sbb.

gcc/testsuite/ChangeLog
	* gcc.target/i386/neg-zext-1.c: New test case for -m32.
	* gcc.target/i386/neg-zext-2.c: New test case for -m64.
This commit is contained in:
Roger Sayle 2022-05-24 15:18:56 +01:00
parent 793f847ba7
commit e8a25550da
3 changed files with 54 additions and 0 deletions
gcc
config/i386
testsuite/gcc.target/i386

@ -11040,6 +11040,46 @@
(clobber (reg:CC FLAGS_REG))])]
"ix86_expand_clear (operands[0]);")
;; Convert:
;; xorl %edx, %edx
;; negl %eax
;; adcl $0, %edx
;; negl %edx
;; to:
;; negl %eax
;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1
(define_peephole2
[(parallel
[(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (reg:CCC FLAGS_REG)
(ne:CCC (match_operand:SWI48 1 "general_reg_operand") (const_int 0)))
(set (match_dup 1) (neg:SWI48 (match_dup 1)))])
(parallel
[(set (match_dup 0)
(plus:SWI48 (plus:SWI48
(ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
(match_dup 0))
(const_int 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 0)
(neg:SWI48 (match_dup 0)))
(clobber (reg:CC FLAGS_REG))])]
"REGNO (operands[0]) != REGNO (operands[1])"
[(parallel
[(set (reg:CCC FLAGS_REG)
(ne:CCC (match_dup 1) (const_int 0)))
(set (match_dup 1) (neg:SWI48 (match_dup 1)))])
(parallel
[(set (match_dup 0)
(if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
(const_int -1)
(const_int 0)))
(clobber (reg:CC FLAGS_REG))])])
(define_insn "*neg<mode>_1"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))

@ -0,0 +1,7 @@
/* { dg-do compile { target ia32 } } */
/* { dg-options "-O2" } */
long long foo(unsigned int x) { return -(long long)x; }
/* { dg-final { scan-assembler "sbb" } } */
/* { dg-final { scan-assembler-not "adc" } } */

@ -0,0 +1,7 @@
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2" } */
__int128 fool(unsigned long x) { return -(__int128)x; }
/* { dg-final { scan-assembler "sbb" } } */
/* { dg-final { scan-assembler-not "adc" } } */