mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-02 01:05:24 +08:00
[PR88407] [OpenACC] Correctly handle unseen async-arguments
... which turn the operation into a no-op. libgomp/ PR libgomp/88407 * plugin/plugin-nvptx.c (nvptx_async_test, nvptx_wait) (nvptx_wait_async): Unseen async-argument is a no-op. * testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Update. * testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. * testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Merge into... * testsuite/libgomp.oacc-c-c++-common/lib-69.c: ... this. Update. * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Merge into... * testsuite/libgomp.oacc-c-c++-common/lib-74.c: ... this. Update From-SVN: r267150
This commit is contained in:
parent
7de562eec2
commit
1404af62dc
@ -1,5 +1,18 @@
|
||||
2018-12-14 Thomas Schwinge <thomas@codesourcery.com>
|
||||
|
||||
PR libgomp/88407
|
||||
* plugin/plugin-nvptx.c (nvptx_async_test, nvptx_wait)
|
||||
(nvptx_wait_async): Unseen async-argument is a no-op.
|
||||
* testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/lib-71.c: Merge into...
|
||||
* testsuite/libgomp.oacc-c-c++-common/lib-69.c: ... this. Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/lib-77.c: Merge into...
|
||||
* testsuite/libgomp.oacc-c-c++-common/lib-74.c: ... this. Update
|
||||
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Revise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise.
|
||||
|
||||
|
@ -1539,9 +1539,8 @@ nvptx_async_test (int async)
|
||||
struct ptx_stream *s;
|
||||
|
||||
s = select_stream_for_async (async, pthread_self (), false, NULL);
|
||||
|
||||
if (!s)
|
||||
GOMP_PLUGIN_fatal ("unknown async %d", async);
|
||||
return 1;
|
||||
|
||||
r = CUDA_CALL_NOCHECK (cuStreamQuery, s->stream);
|
||||
if (r == CUDA_SUCCESS)
|
||||
@ -1596,7 +1595,7 @@ nvptx_wait (int async)
|
||||
|
||||
s = select_stream_for_async (async, pthread_self (), false, NULL);
|
||||
if (!s)
|
||||
GOMP_PLUGIN_fatal ("unknown async %d", async);
|
||||
return;
|
||||
|
||||
CUDA_CALL_ASSERT (cuStreamSynchronize, s->stream);
|
||||
|
||||
@ -1610,14 +1609,14 @@ nvptx_wait_async (int async1, int async2)
|
||||
struct ptx_stream *s1, *s2;
|
||||
pthread_t self = pthread_self ();
|
||||
|
||||
s1 = select_stream_for_async (async1, self, false, NULL);
|
||||
if (!s1)
|
||||
return;
|
||||
|
||||
/* The stream that is waiting (rather than being waited for) doesn't
|
||||
necessarily have to exist already. */
|
||||
s2 = select_stream_for_async (async2, self, true, NULL);
|
||||
|
||||
s1 = select_stream_for_async (async1, self, false, NULL);
|
||||
if (!s1)
|
||||
GOMP_PLUGIN_fatal ("invalid async 1\n");
|
||||
|
||||
if (s1 == s2)
|
||||
GOMP_PLUGIN_fatal ("identical parameters");
|
||||
|
||||
|
@ -41,6 +41,36 @@ int main(void)
|
||||
assert (queues[i].cuda_stream == NULL);
|
||||
}
|
||||
|
||||
/* No-ops still don't initialize them. */
|
||||
{
|
||||
size_t i = 0;
|
||||
/* Find the first non-special async-argument. */
|
||||
while (queues[i].async < 0)
|
||||
++i;
|
||||
assert (i < queues_n);
|
||||
|
||||
#pragma acc wait(queues[i].async) // no-op
|
||||
|
||||
++i;
|
||||
assert (i < queues_n);
|
||||
#pragma acc parallel wait(queues[i].async) // no-op
|
||||
;
|
||||
|
||||
++i;
|
||||
assert (i < queues_n);
|
||||
acc_wait(queues[i].async); // no-op
|
||||
|
||||
i += 2;
|
||||
assert (i < queues_n);
|
||||
acc_wait_async(queues[i - 1].async, queues[i].async); // no-op, and async queue "i" does not get set up
|
||||
|
||||
for (size_t i = 0; i < queues_n; ++i)
|
||||
{
|
||||
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||||
assert (queues[i].cuda_stream == NULL);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < queues_n; ++i)
|
||||
{
|
||||
/* Use the queue to initialize it. */
|
||||
|
@ -69,6 +69,8 @@ main (int argc, char **argv)
|
||||
acc_memcpy_from_device_async (b, d_b, nbytes, 1);
|
||||
|
||||
acc_wait (1);
|
||||
/* Test unseen async-argument. */
|
||||
acc_wait (10);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
|
@ -65,6 +65,8 @@ main (int argc, char **argv)
|
||||
#pragma acc update self (b[0:N]) async (1)
|
||||
|
||||
#pragma acc wait (1)
|
||||
/* Test unseen async-argument. */
|
||||
#pragma acc wait (10)
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
|
@ -103,6 +103,13 @@ main (int argc, char **argv)
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Test unseen async-argument. */
|
||||
if (acc_async_test (1) != 1)
|
||||
{
|
||||
fprintf (stderr, "acc_async_test failed on unseen async-argument\n");
|
||||
abort ();
|
||||
}
|
||||
|
||||
sleep (1);
|
||||
|
||||
if (acc_async_test (0) != 1)
|
||||
|
@ -1,122 +0,0 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* { dg-additional-options "-lcuda" } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <openacc.h>
|
||||
#include <cuda.h>
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
CUdevice dev;
|
||||
CUfunction delay;
|
||||
CUmodule module;
|
||||
CUresult r;
|
||||
CUstream stream;
|
||||
unsigned long *a, *d_a, dticks;
|
||||
int nbytes;
|
||||
float dtime;
|
||||
void *kargs[2];
|
||||
int clkrate;
|
||||
int devnum, nprocs;
|
||||
|
||||
acc_init (acc_device_nvidia);
|
||||
|
||||
devnum = acc_get_device_num (acc_device_nvidia);
|
||||
|
||||
r = cuDeviceGet (&dev, devnum);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuDeviceGet failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r =
|
||||
cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
|
||||
dev);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r = cuModuleLoad (&module, "subr.ptx");
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuModuleLoad failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r = cuModuleGetFunction (&delay, module, "delay");
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
nbytes = nprocs * sizeof (unsigned long);
|
||||
|
||||
dtime = 200.0;
|
||||
|
||||
dticks = (unsigned long) (dtime * clkrate);
|
||||
|
||||
a = (unsigned long *) malloc (nbytes);
|
||||
d_a = (unsigned long *) acc_malloc (nbytes);
|
||||
|
||||
acc_map_data (a, d_a, nbytes);
|
||||
|
||||
kargs[0] = (void *) &d_a;
|
||||
kargs[1] = (void *) &dticks;
|
||||
|
||||
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuStreamCreate failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
acc_set_cuda_stream (0, stream);
|
||||
|
||||
r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
fprintf (stderr, "CheCKpOInT\n");
|
||||
if (acc_async_test (1) != 0)
|
||||
{
|
||||
fprintf (stderr, "asynchronous operation not running\n");
|
||||
abort ();
|
||||
}
|
||||
|
||||
sleep ((int) (dtime / 1000.0f) + 1);
|
||||
|
||||
if (acc_async_test (1) != 1)
|
||||
{
|
||||
fprintf (stderr, "found asynchronous operation still running\n");
|
||||
abort ();
|
||||
}
|
||||
|
||||
acc_unmap_data (a);
|
||||
|
||||
free (a);
|
||||
acc_free (d_a);
|
||||
|
||||
acc_shutdown (acc_device_nvidia);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */
|
||||
/* { dg-output "unknown async \[0-9\]+" } */
|
||||
/* { dg-shouldfail "" } */
|
@ -103,6 +103,8 @@ main (int argc, char **argv)
|
||||
}
|
||||
|
||||
acc_wait (0);
|
||||
/* Test unseen async-argument. */
|
||||
acc_wait (1);
|
||||
|
||||
atime = stop_timer (0);
|
||||
|
||||
@ -115,6 +117,8 @@ main (int argc, char **argv)
|
||||
start_timer (0);
|
||||
|
||||
acc_wait (0);
|
||||
/* Test unseen async-argument. */
|
||||
acc_wait (1);
|
||||
|
||||
atime = stop_timer (0);
|
||||
|
||||
|
@ -1,138 +0,0 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* { dg-additional-options "-lcuda" } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <openacc.h>
|
||||
#include <cuda.h>
|
||||
#include "timer.h"
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
CUdevice dev;
|
||||
CUfunction delay;
|
||||
CUmodule module;
|
||||
CUresult r;
|
||||
CUstream stream;
|
||||
unsigned long *a, *d_a, dticks;
|
||||
int nbytes;
|
||||
float atime, dtime;
|
||||
void *kargs[2];
|
||||
int clkrate;
|
||||
int devnum, nprocs;
|
||||
|
||||
acc_init (acc_device_nvidia);
|
||||
|
||||
devnum = acc_get_device_num (acc_device_nvidia);
|
||||
|
||||
r = cuDeviceGet (&dev, devnum);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuDeviceGet failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r =
|
||||
cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
|
||||
dev);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r = cuModuleLoad (&module, "subr.ptx");
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuModuleLoad failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
r = cuModuleGetFunction (&delay, module, "delay");
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
nbytes = nprocs * sizeof (unsigned long);
|
||||
|
||||
dtime = 200.0;
|
||||
|
||||
dticks = (unsigned long) (dtime * clkrate);
|
||||
|
||||
a = (unsigned long *) malloc (nbytes);
|
||||
d_a = (unsigned long *) acc_malloc (nbytes);
|
||||
|
||||
acc_map_data (a, d_a, nbytes);
|
||||
|
||||
kargs[0] = (void *) &d_a;
|
||||
kargs[1] = (void *) &dticks;
|
||||
|
||||
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuStreamCreate failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
acc_set_cuda_stream (0, stream);
|
||||
|
||||
init_timers (1);
|
||||
|
||||
start_timer (0);
|
||||
|
||||
r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
fprintf (stderr, "CheCKpOInT\n");
|
||||
acc_wait (1);
|
||||
|
||||
atime = stop_timer (0);
|
||||
|
||||
if (atime < dtime)
|
||||
{
|
||||
fprintf (stderr, "actual time < delay time\n");
|
||||
abort ();
|
||||
}
|
||||
|
||||
start_timer (0);
|
||||
|
||||
acc_wait (1);
|
||||
|
||||
atime = stop_timer (0);
|
||||
|
||||
if (0.010 < atime)
|
||||
{
|
||||
fprintf (stderr, "actual time < delay time\n");
|
||||
abort ();
|
||||
}
|
||||
|
||||
acc_unmap_data (a);
|
||||
|
||||
fini_timers ();
|
||||
|
||||
free (a);
|
||||
acc_free (d_a);
|
||||
|
||||
acc_shutdown (acc_device_nvidia);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */
|
||||
/* { dg-output "unknown async \[0-9\]+" } */
|
||||
/* { dg-shouldfail "" } */
|
@ -122,6 +122,13 @@ main (int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
if (acc_async_test (0) != 0)
|
||||
abort ();
|
||||
|
||||
/* Test unseen async-argument. */
|
||||
if (acc_async_test (1) != 1)
|
||||
abort ();
|
||||
|
||||
acc_wait_async (0, 1);
|
||||
|
||||
if (acc_async_test (0) != 0)
|
||||
@ -130,6 +137,23 @@ main (int argc, char **argv)
|
||||
if (acc_async_test (1) != 0)
|
||||
abort ();
|
||||
|
||||
/* Test unseen async-argument. */
|
||||
{
|
||||
if (acc_async_test (2) != 1)
|
||||
abort ();
|
||||
|
||||
acc_wait_async (2, 1);
|
||||
|
||||
if (acc_async_test (0) != 0)
|
||||
abort ();
|
||||
|
||||
if (acc_async_test (1) != 0)
|
||||
abort ();
|
||||
|
||||
if (acc_async_test (2) != 1)
|
||||
abort ();
|
||||
}
|
||||
|
||||
acc_wait (1);
|
||||
|
||||
atime = stop_timer (0);
|
||||
|
@ -17,9 +17,14 @@ program main
|
||||
|
||||
call acc_wait_async (0, 1)
|
||||
|
||||
! Test unseen async-argument.
|
||||
if (acc_async_test (2) .neqv. .TRUE.) call abort
|
||||
call acc_wait_async (2, 1)
|
||||
|
||||
call acc_wait (1)
|
||||
|
||||
if (acc_async_test (0) .neqv. .TRUE.) call abort
|
||||
if (acc_async_test (1) .neqv. .TRUE.) call abort
|
||||
if (acc_async_test (2) .neqv. .TRUE.) call abort
|
||||
|
||||
end program
|
||||
|
Loading…
Reference in New Issue
Block a user