[PR88407] [OpenACC] Correctly handle unseen async-arguments

... which turn the operation into a no-op.

	libgomp/
	PR libgomp/88407
	* plugin/plugin-nvptx.c (nvptx_async_test, nvptx_wait)
	(nvptx_wait_async): Unseen async-argument is a no-op.
	* testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Update.
	* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise.
	* testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-71.c: Merge into...
	* testsuite/libgomp.oacc-c-c++-common/lib-69.c: ... this.  Update.
	* testsuite/libgomp.oacc-c-c++-common/lib-77.c: Merge into...
	* testsuite/libgomp.oacc-c-c++-common/lib-74.c: ... this.  Update

From-SVN: r267150
This commit is contained in:
Thomas Schwinge 2018-12-14 21:42:40 +01:00 committed by Thomas Schwinge
parent 7de562eec2
commit 1404af62dc
11 changed files with 93 additions and 267 deletions

View File

@ -1,5 +1,18 @@
2018-12-14 Thomas Schwinge <thomas@codesourcery.com>
PR libgomp/88407
* plugin/plugin-nvptx.c (nvptx_async_test, nvptx_wait)
(nvptx_wait_async): Unseen async-argument is a no-op.
* testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Update.
* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise.
* testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-71.c: Merge into...
* testsuite/libgomp.oacc-c-c++-common/lib-69.c: ... this. Update.
* testsuite/libgomp.oacc-c-c++-common/lib-77.c: Merge into...
* testsuite/libgomp.oacc-c-c++-common/lib-74.c: ... this. Update
* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Revise.
* testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise.

View File

@ -1539,9 +1539,8 @@ nvptx_async_test (int async)
struct ptx_stream *s;
s = select_stream_for_async (async, pthread_self (), false, NULL);
if (!s)
GOMP_PLUGIN_fatal ("unknown async %d", async);
return 1;
r = CUDA_CALL_NOCHECK (cuStreamQuery, s->stream);
if (r == CUDA_SUCCESS)
@ -1596,7 +1595,7 @@ nvptx_wait (int async)
s = select_stream_for_async (async, pthread_self (), false, NULL);
if (!s)
GOMP_PLUGIN_fatal ("unknown async %d", async);
return;
CUDA_CALL_ASSERT (cuStreamSynchronize, s->stream);
@ -1610,14 +1609,14 @@ nvptx_wait_async (int async1, int async2)
struct ptx_stream *s1, *s2;
pthread_t self = pthread_self ();
s1 = select_stream_for_async (async1, self, false, NULL);
if (!s1)
return;
/* The stream that is waiting (rather than being waited for) doesn't
necessarily have to exist already. */
s2 = select_stream_for_async (async2, self, true, NULL);
s1 = select_stream_for_async (async1, self, false, NULL);
if (!s1)
GOMP_PLUGIN_fatal ("invalid async 1\n");
if (s1 == s2)
GOMP_PLUGIN_fatal ("identical parameters");

View File

@ -41,6 +41,36 @@ int main(void)
assert (queues[i].cuda_stream == NULL);
}
/* No-ops still don't initialize them. */
{
size_t i = 0;
/* Find the first non-special async-argument. */
while (queues[i].async < 0)
++i;
assert (i < queues_n);
#pragma acc wait(queues[i].async) // no-op
++i;
assert (i < queues_n);
#pragma acc parallel wait(queues[i].async) // no-op
;
++i;
assert (i < queues_n);
acc_wait(queues[i].async); // no-op
i += 2;
assert (i < queues_n);
acc_wait_async(queues[i - 1].async, queues[i].async); // no-op, and async queue "i" does not get set up
for (size_t i = 0; i < queues_n; ++i)
{
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
assert (queues[i].cuda_stream == NULL);
}
}
for (size_t i = 0; i < queues_n; ++i)
{
/* Use the queue to initialize it. */

View File

@ -69,6 +69,8 @@ main (int argc, char **argv)
acc_memcpy_from_device_async (b, d_b, nbytes, 1);
acc_wait (1);
/* Test unseen async-argument. */
acc_wait (10);
for (i = 0; i < N; i++)
{

View File

@ -65,6 +65,8 @@ main (int argc, char **argv)
#pragma acc update self (b[0:N]) async (1)
#pragma acc wait (1)
/* Test unseen async-argument. */
#pragma acc wait (10)
for (i = 0; i < N; i++)
{

View File

@ -103,6 +103,13 @@ main (int argc, char **argv)
abort ();
}
/* Test unseen async-argument. */
if (acc_async_test (1) != 1)
{
fprintf (stderr, "acc_async_test failed on unseen async-argument\n");
abort ();
}
sleep (1);
if (acc_async_test (0) != 1)

View File

@ -1,122 +0,0 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-lcuda" } */
#include <stdio.h>
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
int
main (int argc, char **argv)
{
CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
unsigned long *a, *d_a, dticks;
int nbytes;
float dtime;
void *kargs[2];
int clkrate;
int devnum, nprocs;
acc_init (acc_device_nvidia);
devnum = acc_get_device_num (acc_device_nvidia);
r = cuDeviceGet (&dev, devnum);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuDeviceGet failed: %d\n", r);
abort ();
}
r =
cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
dev);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
abort ();
}
r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
abort ();
}
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
nbytes = nprocs * sizeof (unsigned long);
dtime = 200.0;
dticks = (unsigned long) (dtime * clkrate);
a = (unsigned long *) malloc (nbytes);
d_a = (unsigned long *) acc_malloc (nbytes);
acc_map_data (a, d_a, nbytes);
kargs[0] = (void *) &d_a;
kargs[1] = (void *) &dticks;
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuStreamCreate failed: %d\n", r);
abort ();
}
acc_set_cuda_stream (0, stream);
r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
fprintf (stderr, "CheCKpOInT\n");
if (acc_async_test (1) != 0)
{
fprintf (stderr, "asynchronous operation not running\n");
abort ();
}
sleep ((int) (dtime / 1000.0f) + 1);
if (acc_async_test (1) != 1)
{
fprintf (stderr, "found asynchronous operation still running\n");
abort ();
}
acc_unmap_data (a);
free (a);
acc_free (d_a);
acc_shutdown (acc_device_nvidia);
return 0;
}
/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */
/* { dg-output "unknown async \[0-9\]+" } */
/* { dg-shouldfail "" } */

View File

@ -103,6 +103,8 @@ main (int argc, char **argv)
}
acc_wait (0);
/* Test unseen async-argument. */
acc_wait (1);
atime = stop_timer (0);
@ -115,6 +117,8 @@ main (int argc, char **argv)
start_timer (0);
acc_wait (0);
/* Test unseen async-argument. */
acc_wait (1);
atime = stop_timer (0);

View File

@ -1,138 +0,0 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-lcuda" } */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
#include "timer.h"
int
main (int argc, char **argv)
{
CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
unsigned long *a, *d_a, dticks;
int nbytes;
float atime, dtime;
void *kargs[2];
int clkrate;
int devnum, nprocs;
acc_init (acc_device_nvidia);
devnum = acc_get_device_num (acc_device_nvidia);
r = cuDeviceGet (&dev, devnum);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuDeviceGet failed: %d\n", r);
abort ();
}
r =
cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
dev);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
abort ();
}
r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
abort ();
}
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
nbytes = nprocs * sizeof (unsigned long);
dtime = 200.0;
dticks = (unsigned long) (dtime * clkrate);
a = (unsigned long *) malloc (nbytes);
d_a = (unsigned long *) acc_malloc (nbytes);
acc_map_data (a, d_a, nbytes);
kargs[0] = (void *) &d_a;
kargs[1] = (void *) &dticks;
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuStreamCreate failed: %d\n", r);
abort ();
}
acc_set_cuda_stream (0, stream);
init_timers (1);
start_timer (0);
r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
fprintf (stderr, "CheCKpOInT\n");
acc_wait (1);
atime = stop_timer (0);
if (atime < dtime)
{
fprintf (stderr, "actual time < delay time\n");
abort ();
}
start_timer (0);
acc_wait (1);
atime = stop_timer (0);
if (0.010 < atime)
{
fprintf (stderr, "actual time < delay time\n");
abort ();
}
acc_unmap_data (a);
fini_timers ();
free (a);
acc_free (d_a);
acc_shutdown (acc_device_nvidia);
return 0;
}
/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */
/* { dg-output "unknown async \[0-9\]+" } */
/* { dg-shouldfail "" } */

View File

@ -122,6 +122,13 @@ main (int argc, char **argv)
}
}
if (acc_async_test (0) != 0)
abort ();
/* Test unseen async-argument. */
if (acc_async_test (1) != 1)
abort ();
acc_wait_async (0, 1);
if (acc_async_test (0) != 0)
@ -130,6 +137,23 @@ main (int argc, char **argv)
if (acc_async_test (1) != 0)
abort ();
/* Test unseen async-argument. */
{
if (acc_async_test (2) != 1)
abort ();
acc_wait_async (2, 1);
if (acc_async_test (0) != 0)
abort ();
if (acc_async_test (1) != 0)
abort ();
if (acc_async_test (2) != 1)
abort ();
}
acc_wait (1);
atime = stop_timer (0);

View File

@ -17,9 +17,14 @@ program main
call acc_wait_async (0, 1)
! Test unseen async-argument.
if (acc_async_test (2) .neqv. .TRUE.) call abort
call acc_wait_async (2, 1)
call acc_wait (1)
if (acc_async_test (0) .neqv. .TRUE.) call abort
if (acc_async_test (1) .neqv. .TRUE.) call abort
if (acc_async_test (2) .neqv. .TRUE.) call abort
end program