mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-10 13:31:04 +08:00
[nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle
With the pr96628-part1.f90 source and -ftree-slp-vectorize, we run into an ICE due to the fact that V2DI mode is not handled in nvptx_gen_shuffle. Fix this by adding handling of V2DI as well as V2SI mode in nvptx_gen_shuffle. Build and reg-tested on x86_64 with nvptx accelerator. gcc/ChangeLog: PR target/96428 * config/nvptx/nvptx.c (nvptx_gen_shuffle): Handle V2SI/V2DI. libgomp/ChangeLog: PR target/96428 * testsuite/libgomp.oacc-fortran/pr96628-part1.f90: New test. * testsuite/libgomp.oacc-fortran/pr96628-part2.f90: New test.
This commit is contained in:
parent
95f5a3258d
commit
344f09a756
@ -1796,6 +1796,44 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
|
||||
end_sequence ();
|
||||
}
|
||||
break;
|
||||
case E_V2SImode:
|
||||
{
|
||||
rtx src0 = gen_rtx_SUBREG (SImode, src, 0);
|
||||
rtx src1 = gen_rtx_SUBREG (SImode, src, 4);
|
||||
rtx dst0 = gen_rtx_SUBREG (SImode, dst, 0);
|
||||
rtx dst1 = gen_rtx_SUBREG (SImode, dst, 4);
|
||||
rtx tmp0 = gen_reg_rtx (SImode);
|
||||
rtx tmp1 = gen_reg_rtx (SImode);
|
||||
start_sequence ();
|
||||
emit_insn (gen_movsi (tmp0, src0));
|
||||
emit_insn (gen_movsi (tmp1, src1));
|
||||
emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
|
||||
emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
|
||||
emit_insn (gen_movsi (dst0, tmp0));
|
||||
emit_insn (gen_movsi (dst1, tmp1));
|
||||
res = get_insns ();
|
||||
end_sequence ();
|
||||
}
|
||||
break;
|
||||
case E_V2DImode:
|
||||
{
|
||||
rtx src0 = gen_rtx_SUBREG (DImode, src, 0);
|
||||
rtx src1 = gen_rtx_SUBREG (DImode, src, 8);
|
||||
rtx dst0 = gen_rtx_SUBREG (DImode, dst, 0);
|
||||
rtx dst1 = gen_rtx_SUBREG (DImode, dst, 8);
|
||||
rtx tmp0 = gen_reg_rtx (DImode);
|
||||
rtx tmp1 = gen_reg_rtx (DImode);
|
||||
start_sequence ();
|
||||
emit_insn (gen_movdi (tmp0, src0));
|
||||
emit_insn (gen_movdi (tmp1, src1));
|
||||
emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
|
||||
emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
|
||||
emit_insn (gen_movdi (dst0, tmp0));
|
||||
emit_insn (gen_movdi (dst1, tmp1));
|
||||
res = get_insns ();
|
||||
end_sequence ();
|
||||
}
|
||||
break;
|
||||
case E_BImode:
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (SImode);
|
||||
|
20
libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90
Normal file
20
libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90
Normal file
@ -0,0 +1,20 @@
|
||||
! { dg-do run }
|
||||
! { dg-additional-sources pr96628-part2.f90 }
|
||||
! { dg-additional-options "-ftree-slp-vectorize" }
|
||||
!
|
||||
! This file is compiled first
|
||||
module m2
|
||||
real*8 :: mysum
|
||||
!$acc declare device_resident(mysum)
|
||||
contains
|
||||
SUBROUTINE one(t)
|
||||
!$acc routine
|
||||
REAL*8, INTENT(IN) :: t(:)
|
||||
mysum = sum(t)
|
||||
END SUBROUTINE one
|
||||
SUBROUTINE two(t)
|
||||
!$acc routine seq
|
||||
REAL*8, INTENT(INOUT) :: t(:)
|
||||
t = (100.0_8*t)/sum
|
||||
END SUBROUTINE two
|
||||
end module m2
|
37
libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90
Normal file
37
libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90
Normal file
@ -0,0 +1,37 @@
|
||||
! { dg-do compile { target skip-all-targets } }
|
||||
!
|
||||
! Main file is pr96628-part1.f90
|
||||
|
||||
MODULE m
|
||||
IMPLICIT NONE
|
||||
REAL*8, ALLOCATABLE :: t(:)
|
||||
CONTAINS
|
||||
SUBROUTINE run()
|
||||
use m2
|
||||
IMPLICIT NONE
|
||||
|
||||
INTEGER :: i,j ! loop indices
|
||||
!$acc data present(t)
|
||||
!$acc parallel
|
||||
!$acc loop gang
|
||||
DO j = 1,2
|
||||
!$acc loop vector
|
||||
DO i = 1,2
|
||||
CALL one(t(:))
|
||||
CALL two(t(:))
|
||||
END DO
|
||||
END DO
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
END SUBROUTINE run
|
||||
END MODULE m
|
||||
|
||||
use m
|
||||
implicit none
|
||||
integer :: i
|
||||
t = [(3.0_8*i, i = 1, 100)]
|
||||
!$acc data copy(t)
|
||||
call run
|
||||
!$acc end data
|
||||
if (any (abs(t - [((300.0_8*i)/15150.0_8, i = 1, 100)]) < 10.0_8*epsilon(t))) stop 1
|
||||
end
|
Loading…
x
Reference in New Issue
Block a user