mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-13 14:51:18 +08:00
[PR88370] acc_get_cuda_stream/acc_set_cuda_stream: acc_async_sync, acc_async_noval
Per my reading of the OpenACC specification (and as supported by secondary documentation, such as code examples, or presentations), it's valid to call "acc_get_cuda_stream"/"acc_set_cuda_stream" also with "acc_async_sync", "acc_async_noval" arguments, not just with the nonnegative values as currently implemented. libgomp/ PR libgomp/88370 * libgomp.texi (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Clarify. * oacc-cuda.c (acc_get_cuda_stream, acc_set_cuda_stream): Use "async_valid_p". * plugin/plugin-nvptx.c (nvptx_set_cuda_stream): Refuse "async == acc_async_sync". * testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Update. * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise. From-SVN: r267147
This commit is contained in:
parent
5d390fd3ae
commit
18c247cc0b
@ -1,3 +1,17 @@
|
||||
2018-12-14 Thomas Schwinge <thomas@codesourcery.com>
|
||||
|
||||
PR libgomp/88370
|
||||
* libgomp.texi (acc_get_current_cuda_context, acc_get_cuda_stream)
|
||||
(acc_set_cuda_stream): Clarify.
|
||||
* oacc-cuda.c (acc_get_cuda_stream, acc_set_cuda_stream): Use
|
||||
"async_valid_p".
|
||||
* plugin/plugin-nvptx.c (nvptx_set_cuda_stream): Refuse "async ==
|
||||
acc_async_sync".
|
||||
* testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c: New file.
|
||||
* testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/lib-84.c: Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise.
|
||||
|
||||
2018-12-14 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
* testsuite/libgomp.c-c++-common/function-not-offloaded-aux.c: New test.
|
||||
|
@ -2768,7 +2768,7 @@ as used by the CUDA Runtime or Driver API's.
|
||||
|
||||
@item @emph{C/C++}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Prototype}: @tab @code{acc_get_current_cuda_context(void);}
|
||||
@item @emph{Prototype}: @tab @code{void *acc_get_current_cuda_context(void);}
|
||||
@end multitable
|
||||
|
||||
@item @emph{Reference}:
|
||||
@ -2782,12 +2782,12 @@ A.2.1.2.
|
||||
@section @code{acc_get_cuda_stream} -- Get CUDA stream handle.
|
||||
@table @asis
|
||||
@item @emph{Description}
|
||||
This function returns the CUDA stream handle. This handle is the same
|
||||
as used by the CUDA Runtime or Driver API's.
|
||||
This function returns the CUDA stream handle for the queue @var{async}.
|
||||
This handle is the same as used by the CUDA Runtime or Driver API's.
|
||||
|
||||
@item @emph{C/C++}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Prototype}: @tab @code{acc_get_cuda_stream(void);}
|
||||
@item @emph{Prototype}: @tab @code{void *acc_get_cuda_stream(int async);}
|
||||
@end multitable
|
||||
|
||||
@item @emph{Reference}:
|
||||
@ -2802,11 +2802,16 @@ A.2.1.3.
|
||||
@table @asis
|
||||
@item @emph{Description}
|
||||
This function associates the stream handle specified by @var{stream} with
|
||||
the asynchronous value specified by @var{async}.
|
||||
the queue @var{async}.
|
||||
|
||||
This cannot be used to change the stream handle associated with
|
||||
@code{acc_async_sync}.
|
||||
|
||||
The return value is not specified.
|
||||
|
||||
@item @emph{C/C++}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Prototype}: @tab @code{acc_set_cuda_stream(int async void *stream);}
|
||||
@item @emph{Prototype}: @tab @code{int acc_set_cuda_stream(int async, void *stream);}
|
||||
@end multitable
|
||||
|
||||
@item @emph{Reference}:
|
||||
|
@ -58,7 +58,7 @@ acc_get_cuda_stream (int async)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!async_valid_stream_id_p (async))
|
||||
if (!async_valid_p (async))
|
||||
return NULL;
|
||||
|
||||
if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
|
||||
@ -72,7 +72,7 @@ acc_set_cuda_stream (int async, void *stream)
|
||||
{
|
||||
struct goacc_thread *thr;
|
||||
|
||||
if (!async_valid_stream_id_p (async) || stream == NULL)
|
||||
if (!async_valid_p (async) || stream == NULL)
|
||||
return 0;
|
||||
|
||||
goacc_lazy_initialize ();
|
||||
|
@ -1753,8 +1753,14 @@ nvptx_set_cuda_stream (int async, void *stream)
|
||||
pthread_t self = pthread_self ();
|
||||
struct nvptx_thread *nvthd = nvptx_thread ();
|
||||
|
||||
if (async < 0)
|
||||
GOMP_PLUGIN_fatal ("bad async %d", async);
|
||||
/* Due to the "null_stream" usage for "acc_async_sync", this cannot be used
|
||||
to change the stream handle associated with "acc_async_sync". */
|
||||
if (async == acc_async_sync)
|
||||
{
|
||||
GOMP_PLUGIN_debug (0, "Refusing request to set CUDA stream associated"
|
||||
" with \"acc_async_sync\"\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
|
||||
|
||||
|
@ -0,0 +1,42 @@
|
||||
/* Verify expected nvptx plugin behavior for "acc_set_cuda_stream" for
|
||||
"acc_async_sync". */
|
||||
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* { dg-set-target-env-var GOMP_DEBUG "1" } */
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
#include <openacc.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int async = 42;
|
||||
|
||||
/* Initialize. */
|
||||
#pragma acc parallel async(acc_async_sync)
|
||||
;
|
||||
#pragma acc parallel async(async)
|
||||
;
|
||||
#pragma acc wait
|
||||
|
||||
void *cuda_stream_sync = acc_get_cuda_stream (acc_async_sync);
|
||||
assert (cuda_stream_sync == NULL);
|
||||
void *cuda_stream_async = acc_get_cuda_stream (async);
|
||||
assert (cuda_stream_async != NULL);
|
||||
int ret = acc_set_cuda_stream (acc_async_sync, cuda_stream_async);
|
||||
assert (ret == 0);
|
||||
void *cuda_stream_sync_ = acc_get_cuda_stream (acc_async_sync);
|
||||
assert (cuda_stream_sync_ == cuda_stream_sync);
|
||||
void *cuda_stream_async_ = acc_get_cuda_stream (async);
|
||||
assert (cuda_stream_async_ == cuda_stream_async);
|
||||
|
||||
#pragma acc parallel async(acc_async_sync)
|
||||
;
|
||||
#pragma acc parallel async(async)
|
||||
;
|
||||
#pragma acc wait
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-output "Refusing request to set CUDA stream associated with \"acc_async_sync\"" } */
|
97
libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c
Normal file
97
libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c
Normal file
@ -0,0 +1,97 @@
|
||||
/* Test mapping of async values to specific underlying queues. */
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
#include <openacc.h>
|
||||
|
||||
/* This is implemented in terms of the "acc_get_cuda_stream" interface. */
|
||||
|
||||
struct
|
||||
{
|
||||
int async;
|
||||
void *cuda_stream;
|
||||
} queues[] = { { acc_async_sync, NULL },
|
||||
{ acc_async_noval, NULL },
|
||||
{ 0, NULL },
|
||||
{ 1, NULL },
|
||||
{ 2, NULL },
|
||||
{ 36, NULL },
|
||||
{ 1982, NULL } };
|
||||
const size_t queues_n = sizeof queues / sizeof queues[0];
|
||||
|
||||
int main(void)
|
||||
{
|
||||
/* Explicitly initialize: it's not clear whether the following OpenACC
|
||||
runtime library calls implicitly initialize;
|
||||
<https://github.com/OpenACC/openacc-spec/issues/102>. */
|
||||
acc_device_t d;
|
||||
#if defined ACC_DEVICE_TYPE_nvidia
|
||||
d = acc_device_nvidia;
|
||||
#elif defined ACC_DEVICE_TYPE_host
|
||||
d = acc_device_host;
|
||||
#else
|
||||
# error Not ported to this ACC_DEVICE_TYPE
|
||||
#endif
|
||||
acc_init (d);
|
||||
|
||||
for (size_t i = 0; i < queues_n; ++i)
|
||||
{
|
||||
/* Before actually being used, there are all NULL. */
|
||||
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||||
assert (queues[i].cuda_stream == NULL);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < queues_n; ++i)
|
||||
{
|
||||
/* Use the queue to initialize it. */
|
||||
#pragma acc parallel async(queues[i].async)
|
||||
;
|
||||
#pragma acc wait
|
||||
|
||||
/* Verify CUDA stream used. */
|
||||
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||||
#if defined ACC_DEVICE_TYPE_nvidia
|
||||
/* "acc_async_sync" maps to the NULL CUDA default stream. */
|
||||
if (queues[i].async == acc_async_sync)
|
||||
assert (queues[i].cuda_stream == NULL);
|
||||
else
|
||||
assert (queues[i].cuda_stream != NULL);
|
||||
#elif defined ACC_DEVICE_TYPE_host
|
||||
/* For "acc_device_host" there are no CUDA streams. */
|
||||
assert (queues[i].cuda_stream == NULL);
|
||||
#else
|
||||
# error Not ported to this ACC_DEVICE_TYPE
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Verify same results. */
|
||||
for (size_t i = 0; i < queues_n; ++i)
|
||||
{
|
||||
void *cuda_stream;
|
||||
|
||||
cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||||
assert (cuda_stream == queues[i].cuda_stream);
|
||||
|
||||
#pragma acc parallel async(queues[i].async)
|
||||
;
|
||||
#pragma acc wait
|
||||
|
||||
cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||||
assert (cuda_stream == queues[i].cuda_stream);
|
||||
}
|
||||
|
||||
/* Verify individual underlying queues are all different. */
|
||||
for (size_t i = 0; i < queues_n; ++i)
|
||||
{
|
||||
if (queues[i].cuda_stream == NULL)
|
||||
continue;
|
||||
for (size_t j = i + 1; j < queues_n; ++j)
|
||||
{
|
||||
if (queues[j].cuda_stream == NULL)
|
||||
continue;
|
||||
assert (queues[j].cuda_stream != queues[i].cuda_stream);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -7,6 +7,14 @@
|
||||
#include <openacc.h>
|
||||
#include <cuda.h>
|
||||
|
||||
#if !defined __cplusplus
|
||||
# undef static_assert
|
||||
# define static_assert _Static_assert
|
||||
#endif
|
||||
|
||||
static_assert (acc_async_sync == -2, "acc_async_sync?");
|
||||
static_assert (acc_async_noval == -1, "acc_async_noval?");
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
@ -20,9 +28,11 @@ main (int argc, char **argv)
|
||||
|
||||
(void) acc_get_device_num (acc_device_nvidia);
|
||||
|
||||
streams = (CUstream *) malloc (N * sizeof (void *));
|
||||
streams = (CUstream *) malloc ((2 + N) * sizeof (void *));
|
||||
streams += 2;
|
||||
/* "streams[i]" is valid for i in [acc_async_sync..N). */
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
for (i = acc_async_sync; i < N; i++)
|
||||
{
|
||||
streams[i] = (CUstream) acc_get_cuda_stream (i);
|
||||
if (streams[i] != NULL)
|
||||
@ -35,11 +45,20 @@ main (int argc, char **argv)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_set_cuda_stream (i, streams[i]))
|
||||
abort ();
|
||||
int ret = acc_set_cuda_stream (i, streams[i]);
|
||||
if (i == acc_async_sync)
|
||||
{
|
||||
if (ret == 1)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ret != 1)
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
for (i = acc_async_sync; i < N; i++)
|
||||
{
|
||||
int j;
|
||||
int cnt;
|
||||
@ -48,7 +67,7 @@ main (int argc, char **argv)
|
||||
|
||||
s = streams[i];
|
||||
|
||||
for (j = 0; j < N; j++)
|
||||
for (j = acc_async_sync; j < N; j++)
|
||||
{
|
||||
if (s == streams[j])
|
||||
cnt++;
|
||||
|
@ -7,6 +7,14 @@
|
||||
#include <stdio.h>
|
||||
#include <cuda.h>
|
||||
|
||||
#if !defined __cplusplus
|
||||
# undef static_assert
|
||||
# define static_assert _Static_assert
|
||||
#endif
|
||||
|
||||
static_assert (acc_async_sync == -2, "acc_async_sync?");
|
||||
static_assert (acc_async_noval == -1, "acc_async_noval?");
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
@ -20,9 +28,11 @@ main (int argc, char **argv)
|
||||
|
||||
(void) acc_get_device_num (acc_device_nvidia);
|
||||
|
||||
streams = (CUstream *) malloc (N * sizeof (void *));
|
||||
streams = (CUstream *) malloc ((2 + N) * sizeof (void *));
|
||||
streams += 2;
|
||||
/* "streams[i]" is valid for i in [acc_async_sync..N). */
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
for (i = acc_async_sync; i < N; i++)
|
||||
{
|
||||
streams[i] = (CUstream) acc_get_cuda_stream (i);
|
||||
if (streams[i] != NULL)
|
||||
@ -35,8 +45,17 @@ main (int argc, char **argv)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_set_cuda_stream (i, streams[i]))
|
||||
abort ();
|
||||
int ret = acc_set_cuda_stream (i, streams[i]);
|
||||
if (i == acc_async_sync)
|
||||
{
|
||||
if (ret == 1)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ret != 1)
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
s = NULL;
|
||||
|
Loading…
x
Reference in New Issue
Block a user