re PR target/48986 (Missed optimization in atomic decrement on x86/x64)

PR target/48986
	* config/i386/sync.md (sync_old_add<mode>): Relax operand 2
	predicate to allow CONST_INT.
	(*sync_old_add_cmp<mode>): New insn and peephole2 for it.

From-SVN: r173816
This commit is contained in:
Jakub Jelinek 2011-05-17 09:37:59 +02:00 committed by Jakub Jelinek
parent 067e2f069d
commit 8637e32a7e
2 changed files with 66 additions and 2 deletions

View File

@ -1,3 +1,10 @@
2011-05-17 Jakub Jelinek <jakub@redhat.com>
PR target/48986
* config/i386/sync.md (sync_old_add<mode>): Relax operand 2
predicate to allow CONST_INT.
(*sync_old_add_cmp<mode>): New insn and peephole2 for it.
2011-05-16 Joseph Myers <joseph@codesourcery.com>
* opts-common.c (opt_enum_arg_to_value): New.

View File

@ -1,5 +1,5 @@
;; GCC machine description for i386 synchronization instructions.
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
@ -164,17 +164,74 @@
"!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
"xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
;; For operand 2 nonmemory_operand predicate is used instead of
;; register_operand to allow combiner to better optimize atomic
;; additions of constants.
(define_insn "sync_old_add<mode>"
[(set (match_operand:SWI 0 "register_operand" "=<r>")
(unspec_volatile:SWI
[(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
(set (match_dup 1)
(plus:SWI (match_dup 1)
(match_operand:SWI 2 "register_operand" "0")))
(match_operand:SWI 2 "nonmemory_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_XADD"
"lock{%;} xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
;; This peephole2 and following insn optimize
;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
;; followed by testing of flags instead of lock xadd and comparisons.
(define_peephole2
[(set (match_operand:SWI 0 "register_operand" "")
(match_operand:SWI 2 "const_int_operand" ""))
(parallel [(set (match_dup 0)
(unspec_volatile:SWI
[(match_operand:SWI 1 "memory_operand" "")] UNSPECV_XCHG))
(set (match_dup 1)
(plus:SWI (match_dup 1)
(match_dup 0)))
(clobber (reg:CC FLAGS_REG))])
(set (reg:CCZ FLAGS_REG)
(compare:CCZ (match_dup 0)
(match_operand:SWI 3 "const_int_operand" "")))]
"peep2_reg_dead_p (3, operands[0])
&& (unsigned HOST_WIDE_INT) INTVAL (operands[2])
== -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
&& !reg_overlap_mentioned_p (operands[0], operands[1])"
[(parallel [(set (reg:CCZ FLAGS_REG)
(compare:CCZ (unspec_volatile:SWI [(match_dup 1)]
UNSPECV_XCHG)
(match_dup 3)))
(set (match_dup 1)
(plus:SWI (match_dup 1)
(match_dup 2)))])])
(define_insn "*sync_old_add_cmp<mode>"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ (unspec_volatile:SWI
[(match_operand:SWI 0 "memory_operand" "+m")]
UNSPECV_XCHG)
(match_operand:SWI 2 "const_int_operand" "i")))
(set (match_dup 0)
(plus:SWI (match_dup 0)
(match_operand:SWI 1 "const_int_operand" "i")))]
"(unsigned HOST_WIDE_INT) INTVAL (operands[1])
== -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
{
if (TARGET_USE_INCDEC)
{
if (operands[1] == const1_rtx)
return "lock{%;} inc{<imodesuffix>}\t%0";
if (operands[1] == constm1_rtx)
return "lock{%;} dec{<imodesuffix>}\t%0";
}
if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
})
;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
(define_insn "sync_lock_test_and_set<mode>"
[(set (match_operand:SWI 0 "register_operand" "=<r>")