mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-11 10:14:36 +08:00
Add post-reload splitter for extendditi2 on x86_64.
This is another step towards a possible solution for PR 105137. This patch introduces a define_insn for extendditi2 that allows DImode to TImode sign-extension to be represented in the early RTL optimizers, before being split post-reload into the exact same idiom as currently produced by RTL expansion. Typically this produces the identical code, so the first new test case: __int128 foo(long long x) { return (__int128)x; } continues to generate: foo: movq %rdi, %rax cqto ret The "magic" is that this representation allows combine and the other RTL optimizers to do a better job. Hence, the second test case: __int128 foo(__int128 a, long long b) { a += ((__int128)b) << 70; return a; } which mainline with -O2 currently generates as: foo: movq %rsi, %rax movq %rdx, %rcx movq %rdi, %rsi salq $6, %rcx movq %rax, %rdi xorl %eax, %eax movq %rcx, %rdx addq %rsi, %rax adcq %rdi, %rdx ret with this patch now becomes: foo: movl $0, %eax salq $6, %rdx addq %rdi, %rax adcq %rsi, %rdx ret i.e. the same code for the signed and unsigned extension variants. 2023-01-01 Roger Sayle <roger@nextmovesoftware.com> Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog * config/i386/i386.md (extendditi2): New define_insn. (define_split): Use DWIH mode iterator to treat new extendditi2 identically to existing extendsidi2_1. (define_peephole2): Likewise. (define_peephole2): Likewise. (define_Split): Likewise. gcc/testsuite/ChangeLog * gcc.target/i386/extendditi2-1.c: New test case. * gcc.target/i386/extendditi2-2.c: Likewise.
This commit is contained in:
parent
d64f877906
commit
4f1314f547
@ -4548,17 +4548,27 @@
|
||||
"!TARGET_64BIT"
|
||||
"#")
|
||||
|
||||
(define_insn "extendditi2"
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
|
||||
(sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r")))
|
||||
(clobber (reg:CC FLAGS_REG))
|
||||
(clobber (match_scratch:DI 2 "=X,X,X,&r"))]
|
||||
"TARGET_64BIT"
|
||||
"#")
|
||||
|
||||
;; Split the memory case. If the source register doesn't die, it will stay
|
||||
;; this way, if it does die, following peephole2s take care of it.
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "memory_operand")
|
||||
(sign_extend:DI (match_operand:SI 1 "register_operand")))
|
||||
[(set (match_operand:<DWI> 0 "memory_operand")
|
||||
(sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))
|
||||
(clobber (match_operand:SI 2 "register_operand"))]
|
||||
(clobber (match_operand:DWIH 2 "register_operand"))]
|
||||
"reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
|
||||
rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
|
||||
|
||||
split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
|
||||
|
||||
emit_move_insn (operands[3], operands[1]);
|
||||
|
||||
@ -4567,12 +4577,12 @@
|
||||
&& REGNO (operands[1]) == AX_REG
|
||||
&& REGNO (operands[2]) == DX_REG)
|
||||
{
|
||||
emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
|
||||
emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[1], bits));
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_move_insn (operands[2], operands[1]);
|
||||
emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
|
||||
emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[2], bits));
|
||||
}
|
||||
emit_move_insn (operands[4], operands[2]);
|
||||
DONE;
|
||||
@ -4581,69 +4591,79 @@
|
||||
;; Peepholes for the case where the source register does die, after
|
||||
;; being split with the above splitter.
|
||||
(define_peephole2
|
||||
[(set (match_operand:SI 0 "memory_operand")
|
||||
(match_operand:SI 1 "general_reg_operand"))
|
||||
(set (match_operand:SI 2 "general_reg_operand") (match_dup 1))
|
||||
[(set (match_operand:DWIH 0 "memory_operand")
|
||||
(match_operand:DWIH 1 "general_reg_operand"))
|
||||
(set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1))
|
||||
(parallel [(set (match_dup 2)
|
||||
(ashiftrt:SI (match_dup 2) (const_int 31)))
|
||||
(ashiftrt:DWIH (match_dup 2)
|
||||
(match_operand 4 "const_int_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(set (match_operand:SI 3 "memory_operand") (match_dup 2))]
|
||||
(set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
|
||||
"REGNO (operands[1]) != REGNO (operands[2])
|
||||
&& INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
|
||||
&& peep2_reg_dead_p (2, operands[1])
|
||||
&& peep2_reg_dead_p (4, operands[2])
|
||||
&& !reg_mentioned_p (operands[2], operands[3])"
|
||||
[(set (match_dup 0) (match_dup 1))
|
||||
(parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
|
||||
(parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(set (match_dup 3) (match_dup 1))])
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:SI 0 "memory_operand")
|
||||
(match_operand:SI 1 "general_reg_operand"))
|
||||
(parallel [(set (match_operand:SI 2 "general_reg_operand")
|
||||
(ashiftrt:SI (match_dup 1) (const_int 31)))
|
||||
[(set (match_operand:DWIH 0 "memory_operand")
|
||||
(match_operand:DWIH 1 "general_reg_operand"))
|
||||
(parallel [(set (match_operand:DWIH 2 "general_reg_operand")
|
||||
(ashiftrt:DWIH (match_dup 1)
|
||||
(match_operand 4 "const_int_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(set (match_operand:SI 3 "memory_operand") (match_dup 2))]
|
||||
(set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
|
||||
"/* cltd is shorter than sarl $31, %eax */
|
||||
!optimize_function_for_size_p (cfun)
|
||||
&& REGNO (operands[1]) == AX_REG
|
||||
&& REGNO (operands[2]) == DX_REG
|
||||
&& INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
|
||||
&& peep2_reg_dead_p (2, operands[1])
|
||||
&& peep2_reg_dead_p (3, operands[2])
|
||||
&& !reg_mentioned_p (operands[2], operands[3])"
|
||||
[(set (match_dup 0) (match_dup 1))
|
||||
(parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
|
||||
(parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(set (match_dup 3) (match_dup 1))])
|
||||
|
||||
;; Extend to register case. Optimize case where source and destination
|
||||
;; registers match and cases where we can use cltd.
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(sign_extend:DI (match_operand:SI 1 "register_operand")))
|
||||
[(set (match_operand:<DWI> 0 "register_operand")
|
||||
(sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))
|
||||
(clobber (match_scratch:SI 2))]
|
||||
(clobber (match_scratch:DWIH 2))]
|
||||
"reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
|
||||
rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
|
||||
|
||||
split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
|
||||
|
||||
if (REGNO (operands[3]) != REGNO (operands[1]))
|
||||
emit_move_insn (operands[3], operands[1]);
|
||||
|
||||
rtx src = operands[1];
|
||||
if (REGNO (operands[3]) == AX_REG)
|
||||
src = operands[3];
|
||||
|
||||
/* Generate a cltd if possible and doing so it profitable. */
|
||||
if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
|
||||
&& REGNO (operands[3]) == AX_REG
|
||||
&& REGNO (src) == AX_REG
|
||||
&& REGNO (operands[4]) == DX_REG)
|
||||
{
|
||||
emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
|
||||
emit_insn (gen_ashr<mode>3_cvt (operands[4], src, bits));
|
||||
DONE;
|
||||
}
|
||||
|
||||
if (REGNO (operands[4]) != REGNO (operands[1]))
|
||||
emit_move_insn (operands[4], operands[1]);
|
||||
|
||||
emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
|
||||
emit_insn (gen_ashr<mode>3_cvt (operands[4], operands[4], bits));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
8
gcc/testsuite/gcc.target/i386/extendditi2-1.c
Normal file
8
gcc/testsuite/gcc.target/i386/extendditi2-1.c
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do compile { target int128 } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
__int128 foo(long long x)
|
||||
{
|
||||
return (__int128)x;
|
||||
}
|
||||
/* { dg-final { scan-assembler "cqt?o" } } */
|
13
gcc/testsuite/gcc.target/i386/extendditi2-2.c
Normal file
13
gcc/testsuite/gcc.target/i386/extendditi2-2.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile { target int128 } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
__int128 foo(__int128 a, long long b) {
|
||||
a += ((__int128)b) << 70;
|
||||
return a;
|
||||
}
|
||||
|
||||
__int128 bar(__int128 a, unsigned long long b) {
|
||||
a += ((__int128)b) << 70;
|
||||
return a;
|
||||
}
|
||||
/* { dg-final { scan-assembler-not "movq" } } */
|
Loading…
Reference in New Issue
Block a user