mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-10 22:46:22 +08:00
81476bc4f4
This patch adds support for omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of device-specific ICV values (specified as environment variables or changed on a device). In order to reuse device-specific ICV values, a copy back mechanism is implemented that copies ICV values back from device to the host. Additionally, a limitation of the number of teams on gcn offload devices is implemented. The number of teams is limited by twice the number of compute units (one team is executed on one compute unit). This avoids queueing unnessecary many teams and a corresponding allocation of large amounts of memory. Without that limitation the memory allocation for a large number of user-specified teams can result in an "memory access fault". A limitation of the number of teams is already also implemented for nvptx devices (see nvptx_adjust_launch_bounds in libgomp/plugin/plugin-nvptx.c). gcc/ChangeLog: * gimplify.cc (optimize_target_teams): Set initial num_teams_upper to "-2" instead of "1" for non-existing num_teams clause in order to disambiguate from the case of an existing num_teams clause with value 1. libgomp/ChangeLog: * config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to allow processing of device-specific values. (omp_set_teams_thread_limit): Likewise. (ialias): Likewise. * config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise. (omp_set_teams_thread_limit): Likewise. (ialias): Likewise. * icv-device.c (omp_get_teams_thread_limit): Likewise. (ialias): Likewise. (omp_set_teams_thread_limit): Likewise. * icv.c (omp_set_teams_thread_limit): Removed. (omp_get_teams_thread_limit): Likewise. (ialias): Likewise. * libgomp.texi: Updated documentation for nvptx and gcn corresponding to the limitation of the number of teams. * plugin/plugin-gcn.c (limit_teams): New helper function that limits the number of teams by twice the number of compute units. (parse_target_attributes): Limit the number of teams on gcn offload devices. * target.c (get_gomp_offload_icvs): Added teams_thread_limit_var handling. (gomp_load_image_to_device): Added a size check for the ICVs struct variable. (gomp_copy_back_icvs): New function that is used in GOMP_target_ext to copy back the ICV values from device to host. (GOMP_target_ext): Update the number of teams and threads in the kernel args also considering device-specific values. * testsuite/libgomp.c-c++-common/icv-4.c: Fixed an error in the reading of OMP_TEAMS_THREAD_LIMIT from the environment. * testsuite/libgomp.c-c++-common/icv-5.c: Extended. * testsuite/libgomp.c-c++-common/icv-6.c: Extended. * testsuite/libgomp.c-c++-common/icv-7.c: Extended. * testsuite/libgomp.c-c++-common/icv-9.c: New test. * testsuite/libgomp.fortran/icv-5.f90: New test. * testsuite/libgomp.fortran/icv-6.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/target-teams-1.c: Adapt expected values for num_teams from "1" to "-2" in cases without num_teams clause. * g++.dg/gomp/target-teams-1.C: Likewise. * gfortran.dg/gomp/defaultmap-4.f90: Likewise. * gfortran.dg/gomp/defaultmap-5.f90: Likewise. * gfortran.dg/gomp/defaultmap-6.f90: Likewise.
227 lines
6.7 KiB
Fortran
227 lines
6.7 KiB
Fortran
! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" }
|
|
! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" }
|
|
! { dg-set-target-env-var OMP_NUM_TEAMS "5" }
|
|
! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" }
|
|
! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" }
|
|
! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" }
|
|
! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" }
|
|
! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" }
|
|
! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" }
|
|
! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" }
|
|
! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" }
|
|
! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" }
|
|
|
|
use omp_lib
|
|
implicit none (type, external)
|
|
integer :: num_devices, i, large_num_teams, large_threads_limit
|
|
logical :: err
|
|
|
|
if (omp_get_num_devices () > 3) then
|
|
num_devices = 3
|
|
else
|
|
num_devices = omp_get_num_devices ()
|
|
end if
|
|
|
|
do i=0,num_devices-1
|
|
|
|
! Testing NUM_TEAMS.
|
|
if (env_is_set_dev ("OMP_NUM_TEAMS_DEV_", i, 6 + i)) then
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
if (omp_get_max_teams () /= 6 + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 1
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams
|
|
if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) &
|
|
err = .true.
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 2
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
call omp_set_num_teams (5 + i)
|
|
if (omp_get_max_teams () /= 5 + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 3
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
if (omp_get_max_teams () /= 5 + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 4
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams
|
|
if (omp_get_num_teams () > 5 + i .or. omp_get_team_num () >= 5 + i) &
|
|
err = .true.
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 5
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams num_teams(6 + i)
|
|
if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) &
|
|
err = .true.
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 6
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams num_teams(4 + i)
|
|
if (omp_get_num_teams () > 4 + i .or. omp_get_team_num () >= 4 + i) &
|
|
err = .true.
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 7
|
|
|
|
large_num_teams = 66000
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
call omp_set_num_teams (large_num_teams + i)
|
|
if (omp_get_max_teams () /= large_num_teams + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 8
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
if (omp_get_max_teams () /= large_num_teams + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 9
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams
|
|
if (omp_get_num_teams () > large_num_teams + i &
|
|
.or. omp_get_team_num () >= large_num_teams + i) err = .true.
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 10
|
|
end if
|
|
|
|
! Testing TEAMS-THREAD-LIMIT
|
|
if (env_is_set_dev ("OMP_TEAMS_THREAD_LIMIT_DEV_", i, 5 + i)) then
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
if (omp_get_teams_thread_limit () /= 5 + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 11
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams
|
|
!$omp parallel
|
|
if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) &
|
|
err = .true.
|
|
!$omp end parallel
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 12
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
call omp_set_teams_thread_limit (4 + i)
|
|
if (omp_get_teams_thread_limit () /= 4 + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 13
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
if (omp_get_teams_thread_limit () /= 4 + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 14
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams
|
|
!$omp parallel
|
|
if (omp_get_thread_limit () > 4 + i .or. omp_get_thread_num () >= 4 + i) &
|
|
err = .true.
|
|
!$omp end parallel
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 15
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams thread_limit(5 + i)
|
|
!$omp parallel
|
|
if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) &
|
|
err = .true.
|
|
!$omp end parallel
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 16
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams thread_limit(3 + i)
|
|
!$omp parallel
|
|
if (omp_get_thread_limit () > 3 + i .or. omp_get_thread_num () >= 3 + i) &
|
|
err = .true.
|
|
!$omp end parallel
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 17
|
|
|
|
large_threads_limit = 67000
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
call omp_set_teams_thread_limit (large_threads_limit + i)
|
|
if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 18
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true.
|
|
!$omp end target
|
|
if (err) stop 19
|
|
|
|
err = .false.
|
|
!$omp target device(i) map(tofrom: err)
|
|
!$omp teams
|
|
!$omp parallel
|
|
if (omp_get_thread_limit () > large_threads_limit + i &
|
|
.or. omp_get_thread_num () >= large_threads_limit + i) err = .true.
|
|
!$omp end parallel
|
|
!$omp end teams
|
|
!$omp end target
|
|
if (err) stop 20
|
|
end if
|
|
|
|
end do
|
|
|
|
contains
|
|
logical function env_is_set (name, val)
|
|
character(len=*) :: name, val
|
|
character(len=40) :: val2
|
|
integer :: stat
|
|
call get_environment_variable (name, val2, status=stat)
|
|
if (stat == 0) then
|
|
if (val == val2) then
|
|
env_is_set = .true.
|
|
return
|
|
end if
|
|
else if (stat /= 1) then
|
|
error stop 30
|
|
endif
|
|
env_is_set = .false.
|
|
end
|
|
logical function env_is_set_dev (name, dev_num, val)
|
|
character(len=*) :: name
|
|
integer :: dev_num, val
|
|
character(len=64) :: dev_num_str, env_var, val_str
|
|
dev_num_str = ADJUSTL(dev_num_str)
|
|
env_var = name // dev_num_str
|
|
val_str = ADJUSTL(val_str)
|
|
env_is_set_dev = env_is_set (TRIM(env_var), TRIM(val_str))
|
|
end
|
|
end
|