mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-09 17:01:06 +08:00
libgomp: disable barriers in nested teams
Both GCN and NVPTX allow nested parallel regions, but the barrier implementation did not allow the nested teams to run independently of each other (due to hardware limitations). This patch fixes that, under the assumption that each thread will create a new subteam of one thread, by simply not using barriers when there's no other thread to synchronise. libgomp/ChangeLog: * config/gcn/bar.c (gomp_barrier_wait_end): Skip the barrier if the total number of threads is one. (gomp_team_barrier_wake): Likewise. (gomp_team_barrier_wait_end): Likewise. (gomp_team_barrier_wait_cancel_end): Likewise. * config/nvptx/bar.c (gomp_barrier_wait_end): Likewise. (gomp_team_barrier_wake): Likewise. (gomp_team_barrier_wait_end): Likewise. (gomp_team_barrier_wait_cancel_end): Likewise. * testsuite/libgomp.c-c++-common/nested-parallel-unbalanced.c: New test.
This commit is contained in:
parent
95e10b8aa1
commit
6f51395197
libgomp
@ -43,7 +43,8 @@ gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
|
||||
MEMMODEL_RELAXED);
|
||||
}
|
||||
asm ("s_barrier" ::: "memory");
|
||||
if (bar->total > 1)
|
||||
asm ("s_barrier" ::: "memory");
|
||||
}
|
||||
|
||||
void
|
||||
@ -71,7 +72,8 @@ gomp_barrier_wait_last (gomp_barrier_t *bar)
|
||||
void
|
||||
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
|
||||
{
|
||||
asm ("s_barrier" ::: "memory");
|
||||
if (bar->total > 1)
|
||||
asm ("s_barrier" ::: "memory");
|
||||
}
|
||||
|
||||
void
|
||||
@ -97,7 +99,8 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
state &= ~BAR_CANCELLED;
|
||||
state += BAR_INCR - BAR_WAS_LAST;
|
||||
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
|
||||
asm ("s_barrier" ::: "memory");
|
||||
if (bar->total > 1)
|
||||
asm ("s_barrier" ::: "memory");
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -172,7 +175,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||||
{
|
||||
state += BAR_INCR - BAR_WAS_LAST;
|
||||
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
|
||||
asm ("s_barrier" ::: "memory");
|
||||
if (bar->total > 1)
|
||||
asm ("s_barrier" ::: "memory");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -195,7 +199,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||||
abort();
|
||||
}
|
||||
|
||||
asm ("s_barrier" ::: "memory");
|
||||
if (bar->total > 1)
|
||||
asm ("s_barrier" ::: "memory");
|
||||
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||
if (__builtin_expect (gen & BAR_CANCELLED, 0))
|
||||
return true;
|
||||
|
@ -41,7 +41,8 @@ gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
|
||||
MEMMODEL_RELEASE);
|
||||
}
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
if (bar->total > 1)
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
}
|
||||
|
||||
void
|
||||
@ -69,7 +70,8 @@ gomp_barrier_wait_last (gomp_barrier_t *bar)
|
||||
void
|
||||
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
|
||||
{
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
if (bar->total > 1)
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
}
|
||||
|
||||
void
|
||||
@ -95,7 +97,8 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
state &= ~BAR_CANCELLED;
|
||||
state += BAR_INCR - BAR_WAS_LAST;
|
||||
__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
if (bar->total > 1)
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -104,7 +107,8 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
state &= ~BAR_CANCELLED;
|
||||
do
|
||||
{
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
if (bar->total > 1)
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
|
||||
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
|
||||
{
|
||||
@ -158,7 +162,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||||
{
|
||||
state += BAR_INCR - BAR_WAS_LAST;
|
||||
__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
if (bar->total > 1)
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -169,7 +174,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||||
generation = state;
|
||||
do
|
||||
{
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
if (bar->total > 1)
|
||||
asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
|
||||
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
|
||||
if (__builtin_expect (gen & BAR_CANCELLED, 0))
|
||||
return true;
|
||||
|
@ -0,0 +1,31 @@
|
||||
/* Ensure that nested parallel regions work even when the number of loop
|
||||
iterations is not divisible by the number of threads. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
int main() {
|
||||
int A[30][40], B[30][40];
|
||||
size_t n = 30;
|
||||
|
||||
for (size_t i = 0; i < 30; ++i)
|
||||
for (size_t j = 0; j < 40; ++j)
|
||||
A[i][j] = 42;
|
||||
|
||||
#pragma omp target map(A[0:30][0:40], B[0:30][0:40])
|
||||
{
|
||||
#pragma omp parallel for num_threads(8)
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (size_t j = 0; j < n; ++j)
|
||||
{
|
||||
B[i][j] = A[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
for (size_t j = 0; j < n; ++j)
|
||||
if (B[i][j] != 42)
|
||||
abort ();
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user