rs6000: clz/ctz/ffs improvement (PR78683)

On CPUs that implement popcnt[wd] but not cnttz[wd] we can do better for
the ctz sequences than we do today.

CL[TZ]_DEFINED_VALUE_AT_ZERO can return 2, since we always return the
same fixed value (only dependent on TARGET_* options).


	PR target/78683
	* config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Use
	GET_MODE_BITSIZE.  Return 2.
	(CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE.  Return 2.  Handle
	TARGET_POPCNTD the same as TARGET_CTZ.
	* config/rs6000/rs6000.md (ctz<mode>2): Reimplement.
	(ffs<mode>2): Reimplement.

From-SVN: r243499
This commit is contained in:
Segher Boessenkool 2016-12-09 20:31:06 +01:00 committed by Segher Boessenkool
parent 59ab1319cb
commit bb0f9c0249
3 changed files with 48 additions and 35 deletions

View File

@ -1,3 +1,13 @@
2016-12-09 Segher Boessenkool <segher@kernel.crashing.org>
PR target/78683
* config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Use
GET_MODE_BITSIZE. Return 2.
(CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. Handle
TARGET_POPCNTD the same as TARGET_CTZ.
* config/rs6000/rs6000.md (ctz<mode>2): Reimplement.
(ffs<mode>2): Reimplement.
2016-12-09 Andre Vieira <andre.simoesdiasvieira@arm.com>
PR rtl-optimization/78255

View File

@ -2199,14 +2199,15 @@ do { \
/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
((VALUE) = GET_MODE_BITSIZE (MODE), 2)
/* The CTZ patterns that are implemented in terms of CLZ return -1 for input of
zero. The hardware instructions added in Power9 return 32 or 64. */
zero. The hardware instructions added in Power9 and the sequences using
popcount return 32 or 64. */
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
((!TARGET_CTZ) \
? ((VALUE) = -1, 1) \
: ((VALUE) = ((MODE) == SImode ? 32 : 64), 1))
(TARGET_CTZ || TARGET_POPCNTD \
? ((VALUE) = GET_MODE_BITSIZE (MODE), 2) \
: ((VALUE) = -1, 2))
/* Specify the machine mode that pointers have.
After generation of rtl, the compiler makes no further distinction

View File

@ -2220,17 +2220,8 @@
[(set_attr "type" "cntlz")])
(define_expand "ctz<mode>2"
[(set (match_dup 2)
(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
(set (match_dup 3)
(and:GPR (match_dup 1)
(match_dup 2)))
(set (match_dup 4)
(clz:GPR (match_dup 3)))
(parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
(minus:GPR (match_dup 5)
(match_dup 4)))
(clobber (reg:GPR CA_REGNO))])]
[(set (match_operand:GPR 0 "gpc_reg_operand")
(ctz:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
""
{
if (TARGET_CTZ)
@ -2239,10 +2230,26 @@
DONE;
}
operands[2] = gen_reg_rtx (<MODE>mode);
operands[3] = gen_reg_rtx (<MODE>mode);
operands[4] = gen_reg_rtx (<MODE>mode);
operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
rtx tmp1 = gen_reg_rtx (<MODE>mode);
rtx tmp2 = gen_reg_rtx (<MODE>mode);
rtx tmp3 = gen_reg_rtx (<MODE>mode);
if (TARGET_POPCNTD)
{
emit_insn (gen_add<mode>3 (tmp1, operands[1], constm1_rtx));
emit_insn (gen_one_cmpl<mode>2 (tmp2, operands[1]));
emit_insn (gen_and<mode>3 (tmp3, tmp1, tmp2));
emit_insn (gen_popcntd<mode>2 (operands[0], tmp3));
}
else
{
emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
emit_insn (gen_clz<mode>2 (tmp3, tmp2));
emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits> - 1), tmp3));
}
DONE;
})
(define_insn "ctz<mode>2_hw"
@ -2253,23 +2260,18 @@
[(set_attr "type" "cntlz")])
(define_expand "ffs<mode>2"
[(set (match_dup 2)
(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
(set (match_dup 3)
(and:GPR (match_dup 1)
(match_dup 2)))
(set (match_dup 4)
(clz:GPR (match_dup 3)))
(parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
(minus:GPR (match_dup 5)
(match_dup 4)))
(clobber (reg:GPR CA_REGNO))])]
[(set (match_operand:GPR 0 "gpc_reg_operand")
(ffs:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
""
{
operands[2] = gen_reg_rtx (<MODE>mode);
operands[3] = gen_reg_rtx (<MODE>mode);
operands[4] = gen_reg_rtx (<MODE>mode);
operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
rtx tmp1 = gen_reg_rtx (<MODE>mode);
rtx tmp2 = gen_reg_rtx (<MODE>mode);
rtx tmp3 = gen_reg_rtx (<MODE>mode);
emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
emit_insn (gen_clz<mode>2 (tmp3, tmp2));
emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits>), tmp3));
DONE;
})