From 6c2848ad02feef5ac094d1158be3861819b3bb49 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 15 Jul 2020 21:27:00 +0200 Subject: [PATCH] i386: Introduce peephole2 to use flags from CMPXCHG more [PR96189] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CMPXCHG instruction sets ZF flag if the values in the destination operand and EAX register are equal; otherwise the ZF flag is cleared and value from destination operand is loaded to EAX. Following assembly: movl %esi, %eax lock cmpxchgl %edx, (%rdi) cmpl %esi, %eax sete %al can be optimized by removing the unneeded comparison, since set ZF flag signals that no update to EAX happened. 2020-15-07 Uroš Bizjak gcc/ChangeLog: PR target/95355 * config/i386/sync.md (peephole2 to remove unneded compare after CMPXCHG): New pattern. gcc/testsuite/ChangeLog: PR target/95355 * gcc.target/i386/pr96189.c: New test. --- gcc/config/i386/sync.md | 35 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr96189.c | 12 +++++++++ 2 files changed, 47 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr96189.c diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 9ab5456b2274..d203e9d1ecba 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -594,6 +594,41 @@ "TARGET_CMPXCHG" "lock{%;} %K4cmpxchg{}\t{%3, %1|%1, %3}") +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "general_operand")) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI + [(match_operand:SWI 2 "memory_operand") + (match_dup 0) + (match_operand:SWI 3 "register_operand") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CMPXCHG)) + (set (match_dup 2) + (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (reg:CCZ FLAGS_REG) + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI 5 "register_operand") + (match_operand:SWI 6 "general_operand")))] + "(rtx_equal_p (operands[0], operands[5]) + && rtx_equal_p (operands[1], operands[6])) + || (rtx_equal_p (operands[0], operands[6]) + && rtx_equal_p (operands[1], operands[5]))" + [(set (match_dup 0) + (match_dup 1)) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI + [(match_dup 2) + (match_dup 0) + (match_dup 3) + (match_dup 4)] + UNSPECV_CMPXCHG)) + (set (match_dup 2) + (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (reg:CCZ FLAGS_REG) + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])]) + ;; For operand 2 nonmemory_operand predicate is used instead of ;; register_operand to allow combiner to better optimize atomic ;; additions of constants. diff --git a/gcc/testsuite/gcc.target/i386/pr96189.c b/gcc/testsuite/gcc.target/i386/pr96189.c new file mode 100644 index 000000000000..1505e483b940 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr96189.c @@ -0,0 +1,12 @@ +/* PR target/96176 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "\tcmpb\t" } } */ + +_Bool +foo (unsigned char *x, unsigned char y, unsigned char z) +{ + unsigned char y_old = y; + __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return y == y_old; +}