Optimize signed DImode -> TImode on power10.

On power10, GCC tries to optimize the signed conversion from DImode to
TImode by using the vextsd2q instruction.  However to generate this
instruction, it would have to generate 3 direct moves (1 from the GPR
registers to the altivec registers, and 2 from the altivec registers to
the GPR register).

This patch generates the shift right immediate instruction to do the
conversion if the target/source registers ares GPR registers like it does
on earlier systems.  If the target/source registers are Altivec registers,
it will generate the vextsd2q instruction.

2022-03-05   Michael Meissner  <meissner@linux.ibm.com>

gcc/
	PR target/104698
	* config/rs6000/vsx.md (UNSPEC_MTVSRD_DITI_W1): Delete.
	(mtvsrdd_diti_w1): Delete.
	(extendditi2): Convert from define_expand to
	define_insn_and_split.  Replace with code to deal with both GPR
	registers and with altivec registers.

gcc/testsuite/
	PR target/104698
	* gcc.target/powerpc/pr104698-1.c: New test.
	* gcc.target/powerpc/pr104698-2.c: New test.
This commit is contained in:
Michael Meissner 2022-03-05 00:01:52 -05:00
parent 8d96e14c1d
commit 1301d7f647
3 changed files with 124 additions and 22 deletions

View File

@ -360,7 +360,6 @@
UNSPEC_XXGENPCV
UNSPEC_MTVSBM
UNSPEC_EXTENDDITI2
UNSPEC_MTVSRD_DITI_W1
UNSPEC_VCNTMB
UNSPEC_VEXPAND
UNSPEC_VEXTRACT
@ -5023,15 +5022,67 @@
DONE;
})
;; ISA 3.1 vector sign extend
;; Move DI value from GPR to TI mode in VSX register, word 1.
(define_insn "mtvsrdd_diti_w1"
[(set (match_operand:TI 0 "register_operand" "=wa")
(unspec:TI [(match_operand:DI 1 "register_operand" "r")]
UNSPEC_MTVSRD_DITI_W1))]
"TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
"mtvsrdd %x0,0,%1"
[(set_attr "type" "vecmove")])
;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on
;; power10. On earlier systems, the machine independent code will generate a
;; shift left to sign extend the 64-bit value to 128-bit.
;;
;; If the register allocator prefers to use GPR registers, we will use a shift
;; left instruction to sign extend the 64-bit value to 128-bit.
;;
;; If the register allocator prefers to use Altivec registers on power10,
;; generate the vextsd2q instruction.
(define_insn_and_split "extendditi2"
[(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v")
(sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,r,wa,Z")))
(clobber (reg:DI CA_REGNO))]
"TARGET_POWERPC64 && TARGET_POWER10"
"#"
"&& reload_completed"
[(pc)]
{
rtx dest = operands[0];
rtx src = operands[1];
int dest_regno = reg_or_subregno (dest);
/* Handle conversion to GPR registers. Load up the low part and then do
a sign extension to the upper part. */
if (INT_REGNO_P (dest_regno))
{
rtx dest_hi = gen_highpart (DImode, dest);
rtx dest_lo = gen_lowpart (DImode, dest);
emit_move_insn (dest_lo, src);
/* In case src is a MEM, we have to use the destination, which is a
register, instead of re-using the source. */
rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
DONE;
}
/* For conversion to an Altivec register, generate either a splat operation
or a load rightmost double word instruction. Both instructions gets the
DImode value into the lower 64 bits, and then do the vextsd2q
instruction. */
else if (ALTIVEC_REGNO_P (dest_regno))
{
if (MEM_P (src))
emit_insn (gen_vsx_lxvrdx (dest, src));
else
{
rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
}
emit_insn (gen_extendditi2_vector (dest, dest));
DONE;
}
else
gcc_unreachable ();
}
[(set_attr "length" "8")
(set_attr "type" "shift,load,vecmove,vecperm,load")])
;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
(define_insn "extendditi2_vector"
@ -5042,18 +5093,6 @@
"vextsd2q %0,%1"
[(set_attr "type" "vecexts")])
(define_expand "extendditi2"
[(set (match_operand:TI 0 "gpc_reg_operand")
(sign_extend:DI (match_operand:DI 1 "gpc_reg_operand")))]
"TARGET_POWER10"
{
/* Move 64-bit src from GPR to vector reg and sign extend to 128-bits. */
rtx temp = gen_reg_rtx (TImode);
emit_insn (gen_mtvsrdd_diti_w1 (temp, operands[1]));
emit_insn (gen_extendditi2_vector (operands[0], temp));
DONE;
})
;; ISA 3.0 Binary Floating-Point Support

View File

@ -0,0 +1,30 @@
/* { dg-require-effective-target int128 } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
/* PR target/104698 involved a regression where on power10, conversion from
long long to __int128_t generated mtvsrdd, vextsd2q, mfvsrd, and mfvsrld
instructions instead of just a GPR sign extension. This test makes sure the
result is kept in the GPR registers. */
__int128_t convert_1 (long long a)
{
return a; /* sradi. */
}
/* Like convert_1, but make sure a normal offsettable load is done. The
pattern in vsx.md has support for generating lxvdsx if it is coming from
memory. Make sure when the gpr is used, a normal load with offset is still
done. */
__int128_t convert_2 (long long *p)
{
return p[2]; /* ld and sradi. */
}
/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrld\M} } } */
/* { dg-final { scan-assembler-not {\mmtvsrdd\M} } } */
/* { dg-final { scan-assembler-not {\mvextsd2q\M} } } */
/* { dg-final { scan-assembler-times {\mld\M} 1 } } */
/* { dg-final { scan-assembler-times {\msradi\M} 2 } } */

View File

@ -0,0 +1,33 @@
/* { dg-require-effective-target int128 } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
/* PR target/104694 involved GCC generating vextsd2q to convent long long to
__int128_t when the long long value was in the GPR register. This test
verifies that if the result is in the Altivec registers, we still want to
generate vextsd2q. We use __int128_t to indicate that we want the result of
the conversion to be in an Altivec register. */
void do_div_1 (__int128_t *p, __int128_t *q, long long r)
{
*p = *q / r; /* mtvsrdd, vextsd2q, vdivsq. */
}
/* Test the optimization in vsx.md to use lxvrdx instead of ld and mtvsrdd if
the value is coming from memory. */
void do_div_2 (__int128_t *p, __int128_t *q, long long *r)
{
*p = *q / r[2]; /* lxvrdx, vextsd2q, vdivsq. */
}
/* { dg-final { scan-assembler-not {\mld\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrld\M} } } */
/* { dg-final { scan-assembler-not {\msradi\M} } } */
/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */
/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */
/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
/* { dg-final { scan-assembler-times {\mstxv\M} 2 } } */
/* { dg-final { scan-assembler-times {\mvdivsq\M} 2 } } */
/* { dg-final { scan-assembler-times {\mvextsd2q\M} 2 } } */