From fa4fcb111ad4ef015c1caf7c4b5a60d35b3aa997 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 11 Nov 2021 13:57:31 +0100 Subject: [PATCH] libgomp: Use TLS storage for omp_get_num_teams()/omp_get_team_num() values When thinking about GOMP_teams3, I've realized that using global variables for the values returned by omp_get_num_teams()/omp_get_team_num() calls is incorrect even with our right now dumb way of implementing host teams. The problems are two, one is if host teams is used from multiple pthread_create created threads - the spec says that host teams can't be nested inside of explicit parallel or other teams constructs, but with pthread_create the standard says obviously nothing about it. Another more important thing is host fallback, right now we don't do anything for omp_get_num_teams() or omp_get_team_num() which was fine before host teams was introduced and the 5.1 requirement that num_teams clause specifies minimum of teams, but with the global vars it means inside of target teams num_teams (2) we happily return omp_get_num_teams() == 4 if the target teams is inside of host teams with num_teams(4). With target fallback being invoked from parallel regions global vars simply can't work right on the host. So, this patch moves them to struct gomp_thread and propagates those for parallel to child threads. For host fallback, the implicit zeroing of *thr results in us returning omp_get_num_teams () == 1 and omp_get_team_num () == 0 which is fine for target teams without num_teams clause, for target teams with num_teams clause something to work on and for target without teams nested in it I've asked on omp-lang what should be done. 2021-11-11 Jakub Jelinek * libgomp.h (struct gomp_thread): Add num_teams and team_num members. * team.c (struct gomp_thread_start_data): Likewise. (gomp_thread_start): Initialize thr->num_teams and thr->team_num. (gomp_team_start): Initialize start_data->num_teams and start_data->team_num. Update nthr->num_teams and nthr->team_num. * teams.c (gomp_num_teams, gomp_team_num): Remove. (GOMP_teams_reg): Set and restore thr->num_teams and thr->team_num instead of gomp_num_teams and gomp_team_num. (omp_get_num_teams): Use thr->num_teams + 1 instead of gomp_num_teams. (omp_get_team_num): Use thr->team_num instead of gomp_team_num. * testsuite/libgomp.c/teams-4.c: New test. --- libgomp/libgomp.h | 8 +++++++ libgomp/team.c | 8 +++++++ libgomp/teams.c | 18 ++++++++-------- libgomp/testsuite/libgomp.c/teams-4.c | 30 +++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 libgomp/testsuite/libgomp.c/teams-4.c diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 023d8a8c1df7..ceef643216c4 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -768,6 +768,14 @@ struct gomp_thread /* User pthread thread pool */ struct gomp_thread_pool *thread_pool; +#ifdef LIBGOMP_USE_PTHREADS + /* omp_get_num_teams () - 1. */ + unsigned int num_teams; + + /* omp_get_team_num (). */ + unsigned int team_num; +#endif + #if defined(LIBGOMP_USE_PTHREADS) \ && (!defined(HAVE_TLS) \ || !defined(__GLIBC__) \ diff --git a/libgomp/team.c b/libgomp/team.c index 11a2521057ff..3bcc8174d1d0 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -56,6 +56,8 @@ struct gomp_thread_start_data struct gomp_task *task; struct gomp_thread_pool *thread_pool; unsigned int place; + unsigned int num_teams; + unsigned int team_num; bool nested; pthread_t handle; }; @@ -88,6 +90,8 @@ gomp_thread_start (void *xdata) thr->ts = data->ts; thr->task = data->task; thr->place = data->place; + thr->num_teams = data->num_teams; + thr->team_num = data->team_num; #ifdef GOMP_NEEDS_THREAD_HANDLE thr->handle = data->handle; #endif @@ -645,6 +649,8 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, nthr->ts.single_count = 0; #endif nthr->ts.static_trip = 0; + nthr->num_teams = thr->num_teams; + nthr->team_num = thr->team_num; nthr->task = &team->implicit_task[i]; nthr->place = place; gomp_init_task (nthr->task, task, icv); @@ -833,6 +839,8 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, start_data->ts.single_count = 0; #endif start_data->ts.static_trip = 0; + start_data->num_teams = thr->num_teams; + start_data->team_num = thr->team_num; start_data->task = &team->implicit_task[i]; gomp_init_task (start_data->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; diff --git a/libgomp/teams.c b/libgomp/teams.c index 9409f8ee6aa7..a93fb7cc6e18 100644 --- a/libgomp/teams.c +++ b/libgomp/teams.c @@ -28,14 +28,12 @@ #include "libgomp.h" #include -static unsigned gomp_num_teams = 1, gomp_team_num = 0; - void GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams, unsigned int thread_limit, unsigned int flags) { + struct gomp_thread *thr = gomp_thread (); (void) flags; - (void) num_teams; unsigned old_thread_limit_var = 0; if (thread_limit == 0) thread_limit = gomp_teams_thread_limit_var; @@ -48,11 +46,11 @@ GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams, } if (num_teams == 0) num_teams = gomp_nteams_var ? gomp_nteams_var : 3; - gomp_num_teams = num_teams; - for (gomp_team_num = 0; gomp_team_num < num_teams; gomp_team_num++) + thr->num_teams = num_teams - 1; + for (thr->team_num = 0; thr->team_num < num_teams; thr->team_num++) fn (data); - gomp_num_teams = 1; - gomp_team_num = 0; + thr->num_teams = 0; + thr->team_num = 0; if (thread_limit) { struct gomp_task_icv *icv = gomp_icv (true); @@ -63,13 +61,15 @@ GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams, int omp_get_num_teams (void) { - return gomp_num_teams; + struct gomp_thread *thr = gomp_thread (); + return thr->num_teams + 1; } int omp_get_team_num (void) { - return gomp_team_num; + struct gomp_thread *thr = gomp_thread (); + return thr->team_num; } ialias (omp_get_num_teams) diff --git a/libgomp/testsuite/libgomp.c/teams-4.c b/libgomp/testsuite/libgomp.c/teams-4.c new file mode 100644 index 000000000000..c1f5356c622b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/teams-4.c @@ -0,0 +1,30 @@ +#include +#include + +int +main () +{ + if (omp_get_num_teams () != 1 || omp_get_team_num () != 0) + abort (); + #pragma omp parallel num_threads (2) + if (omp_get_num_teams () != 1 || omp_get_team_num () != 0) + abort (); + #pragma omp teams num_teams (4) + { + int team = omp_get_team_num (); + if (omp_get_num_teams () != 4 || (unsigned) team >= 4U) + abort (); + #pragma omp parallel num_threads (3) + if (omp_get_num_teams () != 4 || omp_get_team_num () != team) + abort (); + #pragma omp parallel if (0) + #pragma omp target + #pragma omp teams num_teams (2) + if (omp_get_num_teams () > 2 + || (unsigned) omp_get_team_num () >= 2U) + abort (); + if (omp_get_num_teams () != 4 || (unsigned) team >= 4U) + abort (); + } + return 0; +}