gcc/libgomp/icv-device.c
Marcel Vollweiler 81476bc4f4 OpenMP: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices
This patch adds support for omp_get_max_teams, omp_set_num_teams, and
omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of
device-specific ICV values (specified as environment variables or changed on a
device). In order to reuse device-specific ICV values, a copy back mechanism is
implemented that copies ICV values back from device to the host.

Additionally, a limitation of the number of teams on gcn offload devices is
implemented.  The number of teams is limited by twice the number of compute
units (one team is executed on one compute unit).  This avoids queueing
unnessecary many teams and a corresponding allocation of large amounts of
memory.  Without that limitation the memory allocation for a large number of
user-specified teams can result in an "memory access fault".
A limitation of the number of teams is already also implemented for nvptx
devices (see nvptx_adjust_launch_bounds in libgomp/plugin/plugin-nvptx.c).

gcc/ChangeLog:

	* gimplify.cc (optimize_target_teams): Set initial num_teams_upper
	to "-2" instead of "1" for non-existing num_teams clause in order to
	disambiguate from the case of an existing num_teams clause with value 1.

libgomp/ChangeLog:

	* config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to
	allow processing of device-specific values.
	(omp_set_teams_thread_limit): Likewise.
	(ialias): Likewise.
	* config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise.
	(omp_set_teams_thread_limit): Likewise.
	(ialias): Likewise.
	* icv-device.c (omp_get_teams_thread_limit): Likewise.
	(ialias): Likewise.
	(omp_set_teams_thread_limit): Likewise.
	* icv.c (omp_set_teams_thread_limit): Removed.
	(omp_get_teams_thread_limit): Likewise.
	(ialias): Likewise.
	* libgomp.texi: Updated documentation for nvptx and gcn corresponding
	to the limitation of the number of teams.
	* plugin/plugin-gcn.c (limit_teams): New helper function that limits
	the number of teams by twice the number of compute units.
	(parse_target_attributes): Limit the number of teams on gcn offload
	devices.
	* target.c (get_gomp_offload_icvs): Added teams_thread_limit_var
	handling.
	(gomp_load_image_to_device): Added a size check for the ICVs struct
	variable.
	(gomp_copy_back_icvs): New function that is used in GOMP_target_ext to
	copy back the ICV values from device to host.
	(GOMP_target_ext): Update the number of teams and threads in the kernel
	args also considering device-specific values.
	* testsuite/libgomp.c-c++-common/icv-4.c: Fixed an error in the reading
	of OMP_TEAMS_THREAD_LIMIT from the environment.
	* testsuite/libgomp.c-c++-common/icv-5.c: Extended.
	* testsuite/libgomp.c-c++-common/icv-6.c: Extended.
	* testsuite/libgomp.c-c++-common/icv-7.c: Extended.
	* testsuite/libgomp.c-c++-common/icv-9.c: New test.
	* testsuite/libgomp.fortran/icv-5.f90: New test.
	* testsuite/libgomp.fortran/icv-6.f90: New test.

gcc/testsuite/ChangeLog:

	* c-c++-common/gomp/target-teams-1.c: Adapt expected values for
	num_teams from "1" to "-2" in cases without num_teams clause.
	* g++.dg/gomp/target-teams-1.C: Likewise.
	* gfortran.dg/gomp/defaultmap-4.f90: Likewise.
	* gfortran.dg/gomp/defaultmap-5.f90: Likewise.
	* gfortran.dg/gomp/defaultmap-6.f90: Likewise.
2022-12-06 06:03:50 -08:00

117 lines
2.6 KiB
C

/* Copyright (C) 2005-2022 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file defines OpenMP API entry points that accelerator targets are
expected to replace. */
#include "libgomp.h"
void
omp_set_default_device (int device_num)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->default_device_var = device_num;
}
ialias (omp_set_default_device)
int
omp_get_default_device (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->default_device_var;
}
ialias (omp_get_default_device)
int
omp_get_initial_device (void)
{
return gomp_get_num_devices ();
}
ialias (omp_get_initial_device)
int
omp_get_num_devices (void)
{
return gomp_get_num_devices ();
}
ialias (omp_get_num_devices)
int
omp_is_initial_device (void)
{
/* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
return 1;
}
ialias (omp_is_initial_device)
int
omp_get_device_num (void)
{
/* By specification, this is equivalent to omp_get_initial_device
on the host. */
return ialias_call (omp_get_initial_device) ();
}
ialias (omp_get_device_num)
int
omp_get_max_teams (void)
{
return gomp_nteams_var;
}
ialias (omp_get_max_teams)
void
omp_set_num_teams (int num_teams)
{
if (num_teams >= 0)
gomp_nteams_var = num_teams;
}
ialias (omp_set_num_teams)
int
omp_get_teams_thread_limit (void)
{
return gomp_teams_thread_limit_var;
}
ialias (omp_get_teams_thread_limit)
void
omp_set_teams_thread_limit (int thread_limit)
{
if (thread_limit >= 0)
gomp_teams_thread_limit_var = thread_limit;
}
ialias (omp_set_teams_thread_limit)