nvptx: Use cvt to perform sign-extension of truncation

This patch introduces some new define_insn rules to the nvptx backend,
to perform sign-extension of a truncation (from and to the same mode),
using a single cvt instruction.  As an example, the following function

int foo(int x) { return (char)x; }

with -O2 currently generates:

	mov.u32 %r24, %ar0;
	mov.u32 %r26, %r24;
	cvt.s32.s8      %value, %r26;

and with this patch, now generates:

	mov.u32 %r24, %ar0;
	cvt.s32.s8      %value, %r24;

This patch has been tested on nvptx-none hosted by x86_64-pc-linux-gnu
with a top-level "make" (including newlib) and a "make check" with no
new regressions.

gcc/ChangeLog:

	* config/nvptx/nvptx.md (*extend_trunc_<mode>2_qi,
	*extend_trunc_<mode>2_hi, *extend_trunc_di2_si): New insns.
	Use cvt to perform sign-extension of truncation in one step.

gcc/testsuite/ChangeLog:

	* gcc.target/nvptx/exttrunc-2.c: New test case.
	* gcc.target/nvptx/exttrunc-3.c: New test case.
	* gcc.target/nvptx/exttrunc-4.c: New test case.
	* gcc.target/nvptx/exttrunc-5.c: New test case.
	* gcc.target/nvptx/exttrunc-6.c: New test case.
This commit is contained in:
Roger Sayle 2021-12-08 12:45:38 +01:00 committed by Tom de Vries
parent d3d44a00e5
commit 6b49d50a27
6 changed files with 111 additions and 0 deletions

View File

@ -401,6 +401,32 @@
%.\\tst%A0.u%T0\\t%0, %1;"
[(set_attr "subregs_ok" "true")])
;; Sign-extensions of truncations
(define_insn "*extend_trunc_<mode>2_qi"
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
(sign_extend:HSDIM
(truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))]
""
"%.\\tcvt.s%T0.s8\\t%0, %1;"
[(set_attr "subregs_ok" "true")])
(define_insn "*extend_trunc_<mode>2_hi"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(sign_extend:SDIM
(truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))]
""
"%.\\tcvt.s%T0.s16\\t%0, %1;"
[(set_attr "subregs_ok" "true")])
(define_insn "*extend_trunc_di2_si"
[(set (match_operand:DI 0 "nvptx_register_operand" "=R")
(sign_extend:DI
(truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
""
"%.\\tcvt.s64.s32\\t%0, %1;"
[(set_attr "subregs_ok" "true")])
;; Integer arithmetic
(define_insn "add<mode>3"

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
int exttrunc_si2_qi(int x)
{
return (char)x;
}
/* Match:
mov.u32 %r24, %ar0;
cvt.s32.s8 %value, %r24; */
/* { dg-final { scan-assembler-times "mov\.u32\t%r\[0-9\]*, %ar0" 1 } } */
/* { dg-final { scan-assembler-times "mov\." 1 } } */
/* { dg-final { scan-assembler-times "cvt\.s32\.s8" 1 } } */
/* { dg-final { scan-assembler-times "cvt\." 1 } } */

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
long exttrunc_di2_qi(long x)
{
return (char)x;
}
/* Match:
mov.u64 %r24, %ar0;
cvt.s64.s8 %value, %r24; */
/* { dg-final { scan-assembler-times "mov\.u64\t%r\[0-9\]*, %ar0" 1 } } */
/* { dg-final { scan-assembler-times "mov\." 1 } } */
/* { dg-final { scan-assembler-times "cvt\.s64\.s8" 1 } } */
/* { dg-final { scan-assembler-times "cvt\." 1 } } */

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
int exttrunc_si2_hi(int x)
{
return (short)x;
}
/* Match:
mov.u32 %r24, %ar0;
cvt.s32.s16 %value, %r24;
/* { dg-final { scan-assembler-times "mov\.u32\t%r\[0-9\]*, %ar0" 1 } } */
/* { dg-final { scan-assembler-times "mov\." 1 } } */
/* { dg-final { scan-assembler-times "cvt\.s32\.s16" 1 } } */
/* { dg-final { scan-assembler-times "cvt\." 1 } } */

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
long exttrunc_di2_hi(long x)
{
return (short)x;
}
/* Match:
mov.u64 %r24, %ar0;
cvt.s64.s16 %value, %r24; */
/* { dg-final { scan-assembler-times "mov\.u64\t%r\[0-9\]*, %ar0" 1 } } */
/* { dg-final { scan-assembler-times "mov\." 1 } } */
/* { dg-final { scan-assembler-times "cvt\.s64\.s16" 1 } } */
/* { dg-final { scan-assembler-times "cvt\." 1 } } */

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
long exttrunc_di2_si(long x)
{
return (int)x;
}
/* Match:
mov.u64 %r24, %ar0;
cvt.s64.s32 %value, %r24; */
/* { dg-final { scan-assembler-times "mov\.u64\t%r\[0-9\]*, %ar0" 1 } } */
/* { dg-final { scan-assembler-times "mov\." 1 } } */
/* { dg-final { scan-assembler-times "cvt\.s64\.s32" 1 } } */
/* { dg-final { scan-assembler-times "cvt\." 1 } } */