mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-25 21:45:17 +08:00
81476bc4f4
This patch adds support for omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of device-specific ICV values (specified as environment variables or changed on a device). In order to reuse device-specific ICV values, a copy back mechanism is implemented that copies ICV values back from device to the host. Additionally, a limitation of the number of teams on gcn offload devices is implemented. The number of teams is limited by twice the number of compute units (one team is executed on one compute unit). This avoids queueing unnessecary many teams and a corresponding allocation of large amounts of memory. Without that limitation the memory allocation for a large number of user-specified teams can result in an "memory access fault". A limitation of the number of teams is already also implemented for nvptx devices (see nvptx_adjust_launch_bounds in libgomp/plugin/plugin-nvptx.c). gcc/ChangeLog: * gimplify.cc (optimize_target_teams): Set initial num_teams_upper to "-2" instead of "1" for non-existing num_teams clause in order to disambiguate from the case of an existing num_teams clause with value 1. libgomp/ChangeLog: * config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to allow processing of device-specific values. (omp_set_teams_thread_limit): Likewise. (ialias): Likewise. * config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise. (omp_set_teams_thread_limit): Likewise. (ialias): Likewise. * icv-device.c (omp_get_teams_thread_limit): Likewise. (ialias): Likewise. (omp_set_teams_thread_limit): Likewise. * icv.c (omp_set_teams_thread_limit): Removed. (omp_get_teams_thread_limit): Likewise. (ialias): Likewise. * libgomp.texi: Updated documentation for nvptx and gcn corresponding to the limitation of the number of teams. * plugin/plugin-gcn.c (limit_teams): New helper function that limits the number of teams by twice the number of compute units. (parse_target_attributes): Limit the number of teams on gcn offload devices. * target.c (get_gomp_offload_icvs): Added teams_thread_limit_var handling. (gomp_load_image_to_device): Added a size check for the ICVs struct variable. (gomp_copy_back_icvs): New function that is used in GOMP_target_ext to copy back the ICV values from device to host. (GOMP_target_ext): Update the number of teams and threads in the kernel args also considering device-specific values. * testsuite/libgomp.c-c++-common/icv-4.c: Fixed an error in the reading of OMP_TEAMS_THREAD_LIMIT from the environment. * testsuite/libgomp.c-c++-common/icv-5.c: Extended. * testsuite/libgomp.c-c++-common/icv-6.c: Extended. * testsuite/libgomp.c-c++-common/icv-7.c: Extended. * testsuite/libgomp.c-c++-common/icv-9.c: New test. * testsuite/libgomp.fortran/icv-5.f90: New test. * testsuite/libgomp.fortran/icv-6.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/target-teams-1.c: Adapt expected values for num_teams from "1" to "-2" in cases without num_teams clause. * g++.dg/gomp/target-teams-1.C: Likewise. * gfortran.dg/gomp/defaultmap-4.f90: Likewise. * gfortran.dg/gomp/defaultmap-5.f90: Likewise. * gfortran.dg/gomp/defaultmap-6.f90: Likewise.
260 lines
5.9 KiB
C
260 lines
5.9 KiB
C
/* Copyright (C) 2005-2022 Free Software Foundation, Inc.
|
|
Contributed by Richard Henderson <rth@redhat.com>.
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* This file defines the OpenMP API entry points that operate on internal
|
|
control variables. */
|
|
|
|
#include "libgomp.h"
|
|
#include "gomp-constants.h"
|
|
#include <limits.h>
|
|
|
|
ialias_redirect (omp_get_active_level)
|
|
|
|
void
|
|
omp_set_num_threads (int n)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
icv->nthreads_var = (n > 0 ? n : 1);
|
|
}
|
|
|
|
void
|
|
omp_set_dynamic (int val)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
icv->dyn_var = val;
|
|
}
|
|
|
|
int
|
|
omp_get_dynamic (void)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
return icv->dyn_var;
|
|
}
|
|
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|
void
|
|
omp_set_nested (int val)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
if (val)
|
|
icv->max_active_levels_var = gomp_supported_active_levels;
|
|
else if (icv->max_active_levels_var > 1)
|
|
icv->max_active_levels_var = 1;
|
|
}
|
|
|
|
int
|
|
omp_get_nested (void)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
return (icv->max_active_levels_var > 1
|
|
&& icv->max_active_levels_var > omp_get_active_level ());
|
|
}
|
|
#pragma GCC diagnostic pop
|
|
|
|
void
|
|
omp_set_schedule (omp_sched_t kind, int chunk_size)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
switch (kind & ~omp_sched_monotonic)
|
|
{
|
|
case omp_sched_static:
|
|
if (chunk_size < 1)
|
|
chunk_size = 0;
|
|
icv->run_sched_chunk_size = chunk_size;
|
|
break;
|
|
case omp_sched_dynamic:
|
|
case omp_sched_guided:
|
|
if (chunk_size < 1)
|
|
chunk_size = 1;
|
|
icv->run_sched_chunk_size = chunk_size;
|
|
break;
|
|
case omp_sched_auto:
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
icv->run_sched_var = kind;
|
|
}
|
|
|
|
void
|
|
omp_get_schedule (omp_sched_t *kind, int *chunk_size)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
*kind = icv->run_sched_var;
|
|
*chunk_size = icv->run_sched_chunk_size;
|
|
}
|
|
|
|
int
|
|
omp_get_max_threads (void)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
return icv->nthreads_var;
|
|
}
|
|
|
|
int
|
|
omp_get_thread_limit (void)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var;
|
|
}
|
|
|
|
void
|
|
omp_set_max_active_levels (int max_levels)
|
|
{
|
|
if (max_levels >= 0)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
|
|
if (max_levels <= gomp_supported_active_levels)
|
|
icv->max_active_levels_var = max_levels;
|
|
else
|
|
icv->max_active_levels_var = gomp_supported_active_levels;
|
|
}
|
|
}
|
|
|
|
int
|
|
omp_get_max_active_levels (void)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
return icv->max_active_levels_var;
|
|
}
|
|
|
|
int
|
|
omp_get_supported_active_levels (void)
|
|
{
|
|
return gomp_supported_active_levels;
|
|
}
|
|
|
|
int
|
|
omp_get_cancellation (void)
|
|
{
|
|
return gomp_cancel_var;
|
|
}
|
|
|
|
int
|
|
omp_get_max_task_priority (void)
|
|
{
|
|
return gomp_max_task_priority_var;
|
|
}
|
|
|
|
omp_proc_bind_t
|
|
omp_get_proc_bind (void)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
return icv->bind_var;
|
|
}
|
|
|
|
int
|
|
omp_get_num_places (void)
|
|
{
|
|
return gomp_places_list_len;
|
|
}
|
|
|
|
int
|
|
omp_get_place_num (void)
|
|
{
|
|
if (gomp_places_list == NULL)
|
|
return -1;
|
|
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (thr->place == 0)
|
|
gomp_init_affinity ();
|
|
|
|
return (int) thr->place - 1;
|
|
}
|
|
|
|
int
|
|
omp_get_partition_num_places (void)
|
|
{
|
|
if (gomp_places_list == NULL)
|
|
return 0;
|
|
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (thr->place == 0)
|
|
gomp_init_affinity ();
|
|
|
|
return thr->ts.place_partition_len;
|
|
}
|
|
|
|
void
|
|
omp_get_partition_place_nums (int *place_nums)
|
|
{
|
|
if (gomp_places_list == NULL)
|
|
return;
|
|
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (thr->place == 0)
|
|
gomp_init_affinity ();
|
|
|
|
unsigned int i;
|
|
for (i = 0; i < thr->ts.place_partition_len; i++)
|
|
*place_nums++ = thr->ts.place_partition_off + i;
|
|
}
|
|
|
|
void
|
|
omp_set_default_allocator (omp_allocator_handle_t allocator)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (allocator == omp_null_allocator)
|
|
allocator = omp_default_mem_alloc;
|
|
thr->ts.def_allocator = (uintptr_t) allocator;
|
|
}
|
|
|
|
omp_allocator_handle_t
|
|
omp_get_default_allocator (void)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (thr->ts.def_allocator == omp_null_allocator)
|
|
return (omp_allocator_handle_t) gomp_def_allocator;
|
|
else
|
|
return (omp_allocator_handle_t) thr->ts.def_allocator;
|
|
}
|
|
|
|
ialias (omp_set_dynamic)
|
|
ialias (omp_get_dynamic)
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|
ialias (omp_set_nested)
|
|
ialias (omp_get_nested)
|
|
#pragma GCC diagnostic pop
|
|
ialias (omp_set_num_threads)
|
|
ialias (omp_set_schedule)
|
|
ialias (omp_get_schedule)
|
|
ialias (omp_get_max_threads)
|
|
ialias (omp_get_thread_limit)
|
|
ialias (omp_set_max_active_levels)
|
|
ialias (omp_get_max_active_levels)
|
|
ialias (omp_get_supported_active_levels)
|
|
ialias (omp_get_cancellation)
|
|
ialias (omp_get_proc_bind)
|
|
ialias (omp_get_max_task_priority)
|
|
ialias (omp_get_num_places)
|
|
ialias (omp_get_place_num)
|
|
ialias (omp_get_partition_num_places)
|
|
ialias (omp_get_partition_place_nums)
|
|
ialias (omp_set_default_allocator)
|
|
ialias (omp_get_default_allocator)
|