mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-22 11:41:07 +08:00
mkoffload.c (process): Support variable mapping.
gcc/ * config/nvptx/mkoffload.c (process): Support variable mapping. libgomp/ * libgomp.h (target_mem_desc: Remove mem_map field. (acc_dispatch_t): Remove open_device_func, close_device_func, get_device_num_func, set_device_num_func, target_data members. Change create_thread_data_func argument to device number instead of generic pointer. * oacc-async.c (assert.h): Include. (acc_async_test, acc_async_test_all, acc_wait, acc_wait_async) (acc_wait_all, acc_wait_all_async): Use current host thread's active device, not base_dev. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-host.c (host_dispatch): Don't set open_device_func, close_device_func, get_device_num_func or set_device_num_func. * oacc-init.c (base_dev, init_key): Remove. (cached_base_dev): New. (name_of_acc_device_t): New. (acc_init_1): Initialise default-numbered device, not zeroth. (acc_shutdown_1): Close all devices of a given type. (goacc_destroy_thread): Don't use base_dev. (lazy_open, lazy_init, lazy_init_and_open): Remove. (goacc_attach_host_thread_to_device): New. (acc_init): Reimplement with goacc_attach_host_thread_to_device. (acc_get_num_devices): Don't use base_dev. (acc_set_device_type): Reimplement. (acc_get_device_type): Don't use base_dev. (acc_get_device_num): Tweak logic. (acc_set_device_num): Likewise. (acc_on_device): Use acc_get_device_type. (goacc_runtime_initialize): Initialize cached_base_dev not base_dev. (goacc_lazy_initialize): Reimplement with acc_init and goacc_attach_host_thread_to_device. * oacc-int.h (goacc_thread): Add base_dev field. (base_dev): Remove extern declaration. (goacc_attach_host_thread_to_device): Add prototype. * oacc-mem.c (acc_malloc): Use current thread's device instead of base_dev. (acc_free): Likewise. (acc_memcpy_to_device): Likewise. (acc_memcpy_from_device): Likewise. * oacc-parallel.c (select_acc_device): Remove. Replace calls with goacc_lazy_initialize (throughout). (GOACC_parallel): Use tgt_offset to locate target functions. * target.c (gomp_map_vars): Don't set tgt->mem_map. (gomp_unmap_vars): Use devicep->mem_map pointer not tgt->mem_map. (gomp_load_plugin_for_device): Remove open_device, close_device, get_device_num, set_device_num openacc hook initialisation. Don't set openacc.target_data. * plugin/plugin-host.c (GOMP_OFFLOAD_openacc_open_device) (GOMP_OFFLOAD_openacc_close_device) (GOMP_OFFLOAD_openacc_get_device_num) (GOMP_OFFLOAD_openacc_set_device_num): Remove. (GOMP_OFFLOAD_openacc_create_thread_data): Change (unused) argument to int. * plugin/plugin-nvptx.c (ptx_inited): Remove. (instantiated_devices, ptx_dev_lock): New. (struct ptx_image_data): New. (ptx_devices, ptx_images, ptx_image_lock): New. (fini_streams_for_device): Reorder cuStreamDestroy call. (nvptx_get_num_devices): Remove forward declaration. (nvptx_init): Change return type to bool. (nvptx_fini): Remove. (nvptx_attach_host_thread_to_device): New. (nvptx_open_device): Return struct ptx_device* instead of void*. (nvptx_close_device): Change argument type to struct ptx_device*, return type to void. (nvptx_get_num_devices): Use instantiated_devices not ptx_inited. (kernel_target_data, kernel_host_table): Remove static globals. (GOMP_OFFLOAD_register_image, GOMP_OFFLOAD_get_table): Remove. (GOMP_OFFLOAD_init_device): Reimplement. (GOMP_OFFLOAD_fini_device): Likewise. (GOMP_OFFLOAD_load_image, GOMP_OFFLOAD_unload_image): New. (GOMP_OFFLOAD_alloc, GOMP_OFFLOAD_free, GOMP_OFFLOAD_dev2host) (GOMP_OFFLOAD_host2dev): Use ORD argument. (GOMP_OFFLOAD_openacc_open_device) (GOMP_OFFLOAD_openacc_close_device) (GOMP_OFFLOAD_openacc_set_device_num) (GOMP_OFFLOAD_openacc_get_device_num): Remove. (GOMP_OFFLOAD_openacc_create_thread_data): Change argument to int (device number). libgomp/testsuite/ * libgomp.oacc-c-c++-common/lib-9.c: Fix devnum check in test. From-SVN: r221922
This commit is contained in:
parent
a6330e856f
commit
d93bdab53b
@ -1,3 +1,7 @@
|
||||
2015-04-08 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* config/nvptx/mkoffload.c (process): Support variable mapping.
|
||||
|
||||
2015-03-27 Trevor Saunders <tbsaunde@tbsaunde.org>
|
||||
|
||||
* config/alpha/alpha.c (alpha_use_linkage): Change type of slot to
|
||||
|
@ -839,6 +839,7 @@ process (FILE *in, FILE *out)
|
||||
{
|
||||
const char *input = read_file (in);
|
||||
Token *tok = tokenize (input);
|
||||
unsigned int nvars = 0, nfuncs = 0;
|
||||
|
||||
do
|
||||
tok = parse_file (tok);
|
||||
@ -850,16 +851,17 @@ process (FILE *in, FILE *out)
|
||||
write_stmts (out, rev_stmts (fns));
|
||||
fprintf (out, ";\n\n");
|
||||
fprintf (out, "static const char *var_mappings[] = {\n");
|
||||
for (id_map *id = var_ids; id; id = id->next)
|
||||
for (id_map *id = var_ids; id; id = id->next, nvars++)
|
||||
fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : "");
|
||||
fprintf (out, "};\n\n");
|
||||
fprintf (out, "static const char *func_mappings[] = {\n");
|
||||
for (id_map *id = func_ids; id; id = id->next)
|
||||
for (id_map *id = func_ids; id; id = id->next, nfuncs++)
|
||||
fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : "");
|
||||
fprintf (out, "};\n\n");
|
||||
|
||||
fprintf (out, "static const void *target_data[] = {\n");
|
||||
fprintf (out, " ptx_code, var_mappings, func_mappings\n");
|
||||
fprintf (out, " ptx_code, (void*) %u, var_mappings, (void*) %u, "
|
||||
"func_mappings\n", nvars, nfuncs);
|
||||
fprintf (out, "};\n\n");
|
||||
|
||||
fprintf (out, "extern void GOMP_offload_register (const void *, int, void *);\n");
|
||||
|
@ -1,3 +1,89 @@
|
||||
2015-04-08 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* libgomp.h (target_mem_desc: Remove mem_map field.
|
||||
(acc_dispatch_t): Remove open_device_func, close_device_func,
|
||||
get_device_num_func, set_device_num_func, target_data members.
|
||||
Change create_thread_data_func argument to device number instead of
|
||||
generic pointer.
|
||||
* oacc-async.c (assert.h): Include.
|
||||
(acc_async_test, acc_async_test_all, acc_wait, acc_wait_async)
|
||||
(acc_wait_all, acc_wait_all_async): Use current host thread's
|
||||
active device, not base_dev.
|
||||
* oacc-cuda.c (acc_get_current_cuda_device)
|
||||
(acc_get_current_cuda_context, acc_get_cuda_stream)
|
||||
(acc_set_cuda_stream): Likewise.
|
||||
* oacc-host.c (host_dispatch): Don't set open_device_func,
|
||||
close_device_func, get_device_num_func or set_device_num_func.
|
||||
* oacc-init.c (base_dev, init_key): Remove.
|
||||
(cached_base_dev): New.
|
||||
(name_of_acc_device_t): New.
|
||||
(acc_init_1): Initialise default-numbered device, not zeroth.
|
||||
(acc_shutdown_1): Close all devices of a given type.
|
||||
(goacc_destroy_thread): Don't use base_dev.
|
||||
(lazy_open, lazy_init, lazy_init_and_open): Remove.
|
||||
(goacc_attach_host_thread_to_device): New.
|
||||
(acc_init): Reimplement with goacc_attach_host_thread_to_device.
|
||||
(acc_get_num_devices): Don't use base_dev.
|
||||
(acc_set_device_type): Reimplement.
|
||||
(acc_get_device_type): Don't use base_dev.
|
||||
(acc_get_device_num): Tweak logic.
|
||||
(acc_set_device_num): Likewise.
|
||||
(acc_on_device): Use acc_get_device_type.
|
||||
(goacc_runtime_initialize): Initialize cached_base_dev not base_dev.
|
||||
(goacc_lazy_initialize): Reimplement with acc_init and
|
||||
goacc_attach_host_thread_to_device.
|
||||
* oacc-int.h (goacc_thread): Add base_dev field.
|
||||
(base_dev): Remove extern declaration.
|
||||
(goacc_attach_host_thread_to_device): Add prototype.
|
||||
* oacc-mem.c (acc_malloc): Use current thread's device instead of
|
||||
base_dev.
|
||||
(acc_free): Likewise.
|
||||
(acc_memcpy_to_device): Likewise.
|
||||
(acc_memcpy_from_device): Likewise.
|
||||
* oacc-parallel.c (select_acc_device): Remove. Replace calls with
|
||||
goacc_lazy_initialize (throughout).
|
||||
(GOACC_parallel): Use tgt_offset to locate target functions.
|
||||
* target.c (gomp_map_vars): Don't set tgt->mem_map.
|
||||
(gomp_unmap_vars): Use devicep->mem_map pointer not tgt->mem_map.
|
||||
(gomp_load_plugin_for_device): Remove open_device, close_device,
|
||||
get_device_num, set_device_num openacc hook initialisation. Don't set
|
||||
openacc.target_data.
|
||||
* plugin/plugin-host.c (GOMP_OFFLOAD_openacc_open_device)
|
||||
(GOMP_OFFLOAD_openacc_close_device)
|
||||
(GOMP_OFFLOAD_openacc_get_device_num)
|
||||
(GOMP_OFFLOAD_openacc_set_device_num): Remove.
|
||||
(GOMP_OFFLOAD_openacc_create_thread_data): Change (unused) argument
|
||||
to int.
|
||||
* plugin/plugin-nvptx.c (ptx_inited): Remove.
|
||||
(instantiated_devices, ptx_dev_lock): New.
|
||||
(struct ptx_image_data): New.
|
||||
(ptx_devices, ptx_images, ptx_image_lock): New.
|
||||
(fini_streams_for_device): Reorder cuStreamDestroy call.
|
||||
(nvptx_get_num_devices): Remove forward declaration.
|
||||
(nvptx_init): Change return type to bool.
|
||||
(nvptx_fini): Remove.
|
||||
(nvptx_attach_host_thread_to_device): New.
|
||||
(nvptx_open_device): Return struct ptx_device* instead of void*.
|
||||
(nvptx_close_device): Change argument type to struct ptx_device*,
|
||||
return type to void.
|
||||
(nvptx_get_num_devices): Use instantiated_devices not ptx_inited.
|
||||
(kernel_target_data, kernel_host_table): Remove static globals.
|
||||
(GOMP_OFFLOAD_register_image, GOMP_OFFLOAD_get_table): Remove.
|
||||
(GOMP_OFFLOAD_init_device): Reimplement.
|
||||
(GOMP_OFFLOAD_fini_device): Likewise.
|
||||
(GOMP_OFFLOAD_load_image, GOMP_OFFLOAD_unload_image): New.
|
||||
(GOMP_OFFLOAD_alloc, GOMP_OFFLOAD_free, GOMP_OFFLOAD_dev2host)
|
||||
(GOMP_OFFLOAD_host2dev): Use ORD argument.
|
||||
(GOMP_OFFLOAD_openacc_open_device)
|
||||
(GOMP_OFFLOAD_openacc_close_device)
|
||||
(GOMP_OFFLOAD_openacc_set_device_num)
|
||||
(GOMP_OFFLOAD_openacc_get_device_num): Remove.
|
||||
(GOMP_OFFLOAD_openacc_create_thread_data): Change argument to int
|
||||
(device number).
|
||||
|
||||
testsuite/
|
||||
* libgomp.oacc-c-c++-common/lib-9.c: Fix devnum check in test.
|
||||
|
||||
2015-04-06 Ilya Verbin <ilya.verbin@intel.com>
|
||||
|
||||
* libgomp-plugin.h (struct mapping_table): Replace with addr_pair.
|
||||
|
@ -655,9 +655,6 @@ struct target_mem_desc {
|
||||
/* Corresponding target device descriptor. */
|
||||
struct gomp_device_descr *device_descr;
|
||||
|
||||
/* Memory mapping info for the thread that created this descriptor. */
|
||||
struct splay_tree_s *mem_map;
|
||||
|
||||
/* List of splay keys to remove (or decrease refcount)
|
||||
at the end of region. */
|
||||
splay_tree_key list[];
|
||||
@ -691,18 +688,6 @@ typedef struct acc_dispatch_t
|
||||
/* This is guarded by the lock in the "outer" struct gomp_device_descr. */
|
||||
struct target_mem_desc *data_environ;
|
||||
|
||||
/* Extra information required for a device instance by a given target. */
|
||||
/* This is guarded by the lock in the "outer" struct gomp_device_descr. */
|
||||
void *target_data;
|
||||
|
||||
/* Open or close a device instance. */
|
||||
void *(*open_device_func) (int n);
|
||||
int (*close_device_func) (void *h);
|
||||
|
||||
/* Set or get the device number. */
|
||||
int (*get_device_num_func) (void);
|
||||
void (*set_device_num_func) (int);
|
||||
|
||||
/* Execute. */
|
||||
void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *,
|
||||
unsigned short *, int, int, int, int, void *);
|
||||
@ -720,7 +705,7 @@ typedef struct acc_dispatch_t
|
||||
void (*async_set_async_func) (int);
|
||||
|
||||
/* Create/destroy TLS data. */
|
||||
void *(*create_thread_data_func) (void *);
|
||||
void *(*create_thread_data_func) (int);
|
||||
void (*destroy_thread_data_func) (void *);
|
||||
|
||||
/* NVIDIA target specific routines. */
|
||||
|
@ -26,7 +26,7 @@
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include "openacc.h"
|
||||
#include "libgomp.h"
|
||||
#include "oacc-int.h"
|
||||
@ -37,13 +37,23 @@ acc_async_test (int async)
|
||||
if (async < acc_async_sync)
|
||||
gomp_fatal ("invalid async argument: %d", async);
|
||||
|
||||
return base_dev->openacc.async_test_func (async);
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
return thr->dev->openacc.async_test_func (async);
|
||||
}
|
||||
|
||||
int
|
||||
acc_async_test_all (void)
|
||||
{
|
||||
return base_dev->openacc.async_test_all_func ();
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
return thr->dev->openacc.async_test_all_func ();
|
||||
}
|
||||
|
||||
void
|
||||
@ -52,19 +62,34 @@ acc_wait (int async)
|
||||
if (async < acc_async_sync)
|
||||
gomp_fatal ("invalid async argument: %d", async);
|
||||
|
||||
base_dev->openacc.async_wait_func (async);
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
thr->dev->openacc.async_wait_func (async);
|
||||
}
|
||||
|
||||
void
|
||||
acc_wait_async (int async1, int async2)
|
||||
{
|
||||
base_dev->openacc.async_wait_async_func (async1, async2);
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
thr->dev->openacc.async_wait_async_func (async1, async2);
|
||||
}
|
||||
|
||||
void
|
||||
acc_wait_all (void)
|
||||
{
|
||||
base_dev->openacc.async_wait_all_func ();
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
thr->dev->openacc.async_wait_all_func ();
|
||||
}
|
||||
|
||||
void
|
||||
@ -73,5 +98,10 @@ acc_wait_all_async (int async)
|
||||
if (async < acc_async_sync)
|
||||
gomp_fatal ("invalid async argument: %d", async);
|
||||
|
||||
base_dev->openacc.async_wait_all_async_func (async);
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
thr->dev->openacc.async_wait_all_async_func (async);
|
||||
}
|
||||
|
@ -34,51 +34,53 @@
|
||||
void *
|
||||
acc_get_current_cuda_device (void)
|
||||
{
|
||||
void *p = NULL;
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (base_dev && base_dev->openacc.cuda.get_current_device_func)
|
||||
p = base_dev->openacc.cuda.get_current_device_func ();
|
||||
if (thr && thr->dev && thr->dev->openacc.cuda.get_current_device_func)
|
||||
return thr->dev->openacc.cuda.get_current_device_func ();
|
||||
|
||||
return p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *
|
||||
acc_get_current_cuda_context (void)
|
||||
{
|
||||
void *p = NULL;
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (base_dev && base_dev->openacc.cuda.get_current_context_func)
|
||||
p = base_dev->openacc.cuda.get_current_context_func ();
|
||||
|
||||
return p;
|
||||
if (thr && thr->dev && thr->dev->openacc.cuda.get_current_context_func)
|
||||
return thr->dev->openacc.cuda.get_current_context_func ();
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *
|
||||
acc_get_cuda_stream (int async)
|
||||
{
|
||||
void *p = NULL;
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (async < 0)
|
||||
return p;
|
||||
return NULL;
|
||||
|
||||
if (base_dev && base_dev->openacc.cuda.get_stream_func)
|
||||
p = base_dev->openacc.cuda.get_stream_func (async);
|
||||
|
||||
return p;
|
||||
if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
|
||||
return thr->dev->openacc.cuda.get_stream_func (async);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int
|
||||
acc_set_cuda_stream (int async, void *stream)
|
||||
{
|
||||
int s = -1;
|
||||
struct goacc_thread *thr;
|
||||
|
||||
if (async < 0 || stream == NULL)
|
||||
return 0;
|
||||
|
||||
goacc_lazy_initialize ();
|
||||
|
||||
if (base_dev && base_dev->openacc.cuda.set_stream_func)
|
||||
s = base_dev->openacc.cuda.set_stream_func (async, stream);
|
||||
thr = goacc_thread ();
|
||||
|
||||
return s;
|
||||
if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func)
|
||||
return thr->dev->openacc.cuda.set_stream_func (async, stream);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
@ -53,16 +53,9 @@ static struct gomp_device_descr host_dispatch =
|
||||
.host2dev_func = GOMP_OFFLOAD_host2dev,
|
||||
.run_func = GOMP_OFFLOAD_run,
|
||||
|
||||
.mem_map.root = NULL,
|
||||
.is_initialized = false,
|
||||
|
||||
.openacc = {
|
||||
.open_device_func = GOMP_OFFLOAD_openacc_open_device,
|
||||
.close_device_func = GOMP_OFFLOAD_openacc_close_device,
|
||||
|
||||
.get_device_num_func = GOMP_OFFLOAD_openacc_get_device_num,
|
||||
.set_device_num_func = GOMP_OFFLOAD_openacc_set_device_num,
|
||||
|
||||
.exec_func = GOMP_OFFLOAD_openacc_parallel,
|
||||
|
||||
.register_async_cleanup_func
|
||||
|
@ -37,14 +37,13 @@
|
||||
|
||||
static gomp_mutex_t acc_device_lock;
|
||||
|
||||
/* The dispatch table for the current accelerator device. This is global, so
|
||||
you can only have one type of device open at any given time in a program.
|
||||
This is the "base" device in that several devices that use the same
|
||||
dispatch table may be active concurrently: this one (the "zeroth") is used
|
||||
for overall initialisation/shutdown, and other instances -- not necessarily
|
||||
including this one -- may be opened and closed once the base device has
|
||||
been initialized. */
|
||||
struct gomp_device_descr *base_dev;
|
||||
/* A cached version of the dispatcher for the global "current" accelerator type,
|
||||
e.g. used as the default when creating new host threads. This is the
|
||||
device-type equivalent of goacc_device_num (which specifies which device to
|
||||
use out of potentially several of the same type). If there are several
|
||||
devices of a given type, this points at the first one. */
|
||||
|
||||
static struct gomp_device_descr *cached_base_dev = NULL;
|
||||
|
||||
#if defined HAVE_TLS || defined USE_EMUTLS
|
||||
__thread struct goacc_thread *goacc_tls_data;
|
||||
@ -53,9 +52,6 @@ pthread_key_t goacc_tls_key;
|
||||
#endif
|
||||
static pthread_key_t goacc_cleanup_key;
|
||||
|
||||
/* Current dispatcher, and how it was initialized */
|
||||
static acc_device_t init_key = _ACC_device_hwm;
|
||||
|
||||
static struct goacc_thread *goacc_threads;
|
||||
static gomp_mutex_t goacc_thread_lock;
|
||||
|
||||
@ -94,6 +90,21 @@ get_openacc_name (const char *name)
|
||||
return name;
|
||||
}
|
||||
|
||||
static const char *
|
||||
name_of_acc_device_t (enum acc_device_t type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case acc_device_none: return "none";
|
||||
case acc_device_default: return "default";
|
||||
case acc_device_host: return "host";
|
||||
case acc_device_host_nonshm: return "host_nonshm";
|
||||
case acc_device_not_host: return "not_host";
|
||||
case acc_device_nvidia: return "nvidia";
|
||||
default: gomp_fatal ("unknown device type %u", (unsigned) type);
|
||||
}
|
||||
}
|
||||
|
||||
static struct gomp_device_descr *
|
||||
resolve_device (acc_device_t d)
|
||||
{
|
||||
@ -159,22 +170,87 @@ resolve_device (acc_device_t d)
|
||||
static struct gomp_device_descr *
|
||||
acc_init_1 (acc_device_t d)
|
||||
{
|
||||
struct gomp_device_descr *acc_dev;
|
||||
struct gomp_device_descr *base_dev, *acc_dev;
|
||||
int ndevs;
|
||||
|
||||
acc_dev = resolve_device (d);
|
||||
base_dev = resolve_device (d);
|
||||
|
||||
if (!acc_dev || acc_dev->get_num_devices_func () <= 0)
|
||||
gomp_fatal ("device %u not supported", (unsigned)d);
|
||||
ndevs = base_dev->get_num_devices_func ();
|
||||
|
||||
if (!base_dev || ndevs <= 0 || goacc_device_num >= ndevs)
|
||||
gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
|
||||
|
||||
acc_dev = &base_dev[goacc_device_num];
|
||||
|
||||
if (acc_dev->is_initialized)
|
||||
gomp_fatal ("device already active");
|
||||
|
||||
/* We need to remember what we were intialized as, to check shutdown etc. */
|
||||
init_key = d;
|
||||
|
||||
gomp_init_device (acc_dev);
|
||||
|
||||
return acc_dev;
|
||||
return base_dev;
|
||||
}
|
||||
|
||||
static void
|
||||
acc_shutdown_1 (acc_device_t d)
|
||||
{
|
||||
struct gomp_device_descr *base_dev;
|
||||
struct goacc_thread *walk;
|
||||
int ndevs, i;
|
||||
bool devices_active = false;
|
||||
|
||||
/* Get the base device for this device type. */
|
||||
base_dev = resolve_device (d);
|
||||
|
||||
if (!base_dev)
|
||||
gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
|
||||
|
||||
gomp_mutex_lock (&goacc_thread_lock);
|
||||
|
||||
/* Free target-specific TLS data and close all devices. */
|
||||
for (walk = goacc_threads; walk != NULL; walk = walk->next)
|
||||
{
|
||||
if (walk->target_tls)
|
||||
base_dev->openacc.destroy_thread_data_func (walk->target_tls);
|
||||
|
||||
walk->target_tls = NULL;
|
||||
|
||||
/* This would mean the user is shutting down OpenACC in the middle of an
|
||||
"acc data" pragma. Likely not intentional. */
|
||||
if (walk->mapped_data)
|
||||
gomp_fatal ("shutdown in 'acc data' region");
|
||||
|
||||
/* Similarly, if this happens then user code has done something weird. */
|
||||
if (walk->saved_bound_dev)
|
||||
gomp_fatal ("shutdown during host fallback");
|
||||
|
||||
if (walk->dev)
|
||||
{
|
||||
gomp_mutex_lock (&walk->dev->lock);
|
||||
gomp_free_memmap (&walk->dev->mem_map);
|
||||
gomp_mutex_unlock (&walk->dev->lock);
|
||||
|
||||
walk->dev = NULL;
|
||||
walk->base_dev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
gomp_mutex_unlock (&goacc_thread_lock);
|
||||
|
||||
ndevs = base_dev->get_num_devices_func ();
|
||||
|
||||
/* Close all the devices of this type that have been opened. */
|
||||
for (i = 0; i < ndevs; i++)
|
||||
{
|
||||
struct gomp_device_descr *acc_dev = &base_dev[i];
|
||||
if (acc_dev->is_initialized)
|
||||
{
|
||||
devices_active = true;
|
||||
gomp_fini_device (acc_dev);
|
||||
}
|
||||
}
|
||||
|
||||
if (!devices_active)
|
||||
gomp_fatal ("no device initialized");
|
||||
}
|
||||
|
||||
static struct goacc_thread *
|
||||
@ -207,9 +283,11 @@ goacc_destroy_thread (void *data)
|
||||
|
||||
if (thr)
|
||||
{
|
||||
if (base_dev && thr->target_tls)
|
||||
struct gomp_device_descr *acc_dev = thr->dev;
|
||||
|
||||
if (acc_dev && thr->target_tls)
|
||||
{
|
||||
base_dev->openacc.destroy_thread_data_func (thr->target_tls);
|
||||
acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
|
||||
thr->target_tls = NULL;
|
||||
}
|
||||
|
||||
@ -236,53 +314,49 @@ goacc_destroy_thread (void *data)
|
||||
gomp_mutex_unlock (&goacc_thread_lock);
|
||||
}
|
||||
|
||||
/* Open the ORD'th device of the currently-active type (base_dev must be
|
||||
initialised before calling). If ORD is < 0, open the default-numbered
|
||||
device (set by the ACC_DEVICE_NUM environment variable or a call to
|
||||
acc_set_device_num), or leave any currently-opened device as is. "Opening"
|
||||
consists of calling the device's open_device_func hook, and setting up
|
||||
thread-local data (maybe allocating, then initializing with information
|
||||
pertaining to the newly-opened or previously-opened device). */
|
||||
/* Use the ORD'th device instance for the current host thread (or -1 for the
|
||||
current global default). The device (and the runtime) must be initialised
|
||||
before calling this function. */
|
||||
|
||||
static void
|
||||
lazy_open (int ord)
|
||||
void
|
||||
goacc_attach_host_thread_to_device (int ord)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
struct gomp_device_descr *acc_dev;
|
||||
|
||||
if (thr && thr->dev)
|
||||
{
|
||||
assert (ord < 0 || ord == thr->dev->target_id);
|
||||
return;
|
||||
}
|
||||
|
||||
assert (base_dev);
|
||||
|
||||
struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
|
||||
int num_devices;
|
||||
|
||||
if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
|
||||
return;
|
||||
|
||||
if (ord < 0)
|
||||
ord = goacc_device_num;
|
||||
|
||||
/* The OpenACC 2.0 spec leaves the runtime's behaviour when an out-of-range
|
||||
device is requested as implementation-defined (4.2 ACC_DEVICE_NUM).
|
||||
We choose to raise an error in such a case. */
|
||||
if (ord >= base_dev->get_num_devices_func ())
|
||||
gomp_fatal ("device %u does not exist", ord);
|
||||
|
||||
|
||||
/* Decide which type of device to use. If the current thread has a device
|
||||
type already (e.g. set by acc_set_device_type), use that, else use the
|
||||
global default. */
|
||||
if (thr && thr->base_dev)
|
||||
base_dev = thr->base_dev;
|
||||
else
|
||||
{
|
||||
assert (cached_base_dev);
|
||||
base_dev = cached_base_dev;
|
||||
}
|
||||
|
||||
num_devices = base_dev->get_num_devices_func ();
|
||||
if (num_devices <= 0 || ord >= num_devices)
|
||||
gomp_fatal ("device %u out of range", ord);
|
||||
|
||||
if (!thr)
|
||||
thr = goacc_new_thread ();
|
||||
|
||||
acc_dev = thr->dev = &base_dev[ord];
|
||||
|
||||
assert (acc_dev->target_id == ord);
|
||||
|
||||
|
||||
thr->base_dev = base_dev;
|
||||
thr->dev = acc_dev = &base_dev[ord];
|
||||
thr->saved_bound_dev = NULL;
|
||||
thr->mapped_data = NULL;
|
||||
|
||||
if (!acc_dev->openacc.target_data)
|
||||
acc_dev->openacc.target_data = acc_dev->openacc.open_device_func (ord);
|
||||
|
||||
|
||||
thr->target_tls
|
||||
= acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data);
|
||||
|
||||
= acc_dev->openacc.create_thread_data_func (ord);
|
||||
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
}
|
||||
|
||||
@ -292,74 +366,20 @@ lazy_open (int ord)
|
||||
void
|
||||
acc_init (acc_device_t d)
|
||||
{
|
||||
if (!base_dev)
|
||||
if (!cached_base_dev)
|
||||
gomp_init_targets_once ();
|
||||
|
||||
gomp_mutex_lock (&acc_device_lock);
|
||||
|
||||
base_dev = acc_init_1 (d);
|
||||
|
||||
lazy_open (-1);
|
||||
cached_base_dev = acc_init_1 (d);
|
||||
|
||||
gomp_mutex_unlock (&acc_device_lock);
|
||||
|
||||
goacc_attach_host_thread_to_device (-1);
|
||||
}
|
||||
|
||||
ialias (acc_init)
|
||||
|
||||
static void
|
||||
acc_shutdown_1 (acc_device_t d)
|
||||
{
|
||||
struct goacc_thread *walk;
|
||||
|
||||
/* We don't check whether d matches the actual device found, because
|
||||
OpenACC 2.0 (3.2.12) says the parameters to the init and this
|
||||
call must match (for the shutdown call anyway, it's silent on
|
||||
others). */
|
||||
|
||||
if (!base_dev)
|
||||
gomp_fatal ("no device initialized");
|
||||
if (d != init_key)
|
||||
gomp_fatal ("device %u(%u) is initialized",
|
||||
(unsigned) init_key, (unsigned) base_dev->type);
|
||||
|
||||
gomp_mutex_lock (&goacc_thread_lock);
|
||||
|
||||
/* Free target-specific TLS data and close all devices. */
|
||||
for (walk = goacc_threads; walk != NULL; walk = walk->next)
|
||||
{
|
||||
if (walk->target_tls)
|
||||
base_dev->openacc.destroy_thread_data_func (walk->target_tls);
|
||||
|
||||
walk->target_tls = NULL;
|
||||
|
||||
/* This would mean the user is shutting down OpenACC in the middle of an
|
||||
"acc data" pragma. Likely not intentional. */
|
||||
if (walk->mapped_data)
|
||||
gomp_fatal ("shutdown in 'acc data' region");
|
||||
|
||||
if (walk->dev)
|
||||
{
|
||||
void *target_data = walk->dev->openacc.target_data;
|
||||
if (walk->dev->openacc.close_device_func (target_data) < 0)
|
||||
gomp_fatal ("failed to close device");
|
||||
|
||||
walk->dev->openacc.target_data = target_data = NULL;
|
||||
|
||||
gomp_mutex_lock (&walk->dev->lock);
|
||||
gomp_free_memmap (&walk->dev->mem_map);
|
||||
gomp_mutex_unlock (&walk->dev->lock);
|
||||
|
||||
walk->dev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
gomp_mutex_unlock (&goacc_thread_lock);
|
||||
|
||||
gomp_fini_device (base_dev);
|
||||
|
||||
base_dev = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
acc_shutdown (acc_device_t d)
|
||||
{
|
||||
@ -372,59 +392,16 @@ acc_shutdown (acc_device_t d)
|
||||
|
||||
ialias (acc_shutdown)
|
||||
|
||||
/* This function is called after plugins have been initialized. It deals with
|
||||
the "base" device, and is used to prepare the runtime for dealing with a
|
||||
number of such devices (as implemented by some particular plugin). If the
|
||||
argument device type D matches a previous call to the function, return the
|
||||
current base device, else shut the old device down and re-initialize with
|
||||
the new device type. */
|
||||
|
||||
static struct gomp_device_descr *
|
||||
lazy_init (acc_device_t d)
|
||||
{
|
||||
if (base_dev)
|
||||
{
|
||||
/* Re-initializing the same device, do nothing. */
|
||||
if (d == init_key)
|
||||
return base_dev;
|
||||
|
||||
acc_shutdown_1 (init_key);
|
||||
}
|
||||
|
||||
assert (!base_dev);
|
||||
|
||||
return acc_init_1 (d);
|
||||
}
|
||||
|
||||
/* Ensure that plugins are loaded, initialize and open the (default-numbered)
|
||||
device. */
|
||||
|
||||
static void
|
||||
lazy_init_and_open (acc_device_t d)
|
||||
{
|
||||
if (!base_dev)
|
||||
gomp_init_targets_once ();
|
||||
|
||||
gomp_mutex_lock (&acc_device_lock);
|
||||
|
||||
base_dev = lazy_init (d);
|
||||
|
||||
lazy_open (-1);
|
||||
|
||||
gomp_mutex_unlock (&acc_device_lock);
|
||||
}
|
||||
|
||||
int
|
||||
acc_get_num_devices (acc_device_t d)
|
||||
{
|
||||
int n = 0;
|
||||
const struct gomp_device_descr *acc_dev;
|
||||
struct gomp_device_descr *acc_dev;
|
||||
|
||||
if (d == acc_device_none)
|
||||
return 0;
|
||||
|
||||
if (!base_dev)
|
||||
gomp_init_targets_once ();
|
||||
gomp_init_targets_once ();
|
||||
|
||||
acc_dev = resolve_device (d);
|
||||
if (!acc_dev)
|
||||
@ -439,10 +416,39 @@ acc_get_num_devices (acc_device_t d)
|
||||
|
||||
ialias (acc_get_num_devices)
|
||||
|
||||
/* Set the device type for the current thread only (using the current global
|
||||
default device number), initialising that device if necessary. Also set the
|
||||
default device type for new threads to D. */
|
||||
|
||||
void
|
||||
acc_set_device_type (acc_device_t d)
|
||||
{
|
||||
lazy_init_and_open (d);
|
||||
struct gomp_device_descr *base_dev, *acc_dev;
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
gomp_mutex_lock (&acc_device_lock);
|
||||
|
||||
if (!cached_base_dev)
|
||||
gomp_init_targets_once ();
|
||||
|
||||
cached_base_dev = base_dev = resolve_device (d);
|
||||
acc_dev = &base_dev[goacc_device_num];
|
||||
|
||||
if (!acc_dev->is_initialized)
|
||||
gomp_init_device (acc_dev);
|
||||
|
||||
gomp_mutex_unlock (&acc_device_lock);
|
||||
|
||||
/* We're changing device type: invalidate the current thread's dev and
|
||||
base_dev pointers. */
|
||||
if (thr && thr->base_dev != base_dev)
|
||||
{
|
||||
thr->base_dev = thr->dev = NULL;
|
||||
if (thr->mapped_data)
|
||||
gomp_fatal ("acc_set_device_type in 'acc data' region");
|
||||
}
|
||||
|
||||
goacc_attach_host_thread_to_device (-1);
|
||||
}
|
||||
|
||||
ialias (acc_set_device_type)
|
||||
@ -451,10 +457,11 @@ acc_device_t
|
||||
acc_get_device_type (void)
|
||||
{
|
||||
acc_device_t res = acc_device_none;
|
||||
const struct gomp_device_descr *dev;
|
||||
struct gomp_device_descr *dev;
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (base_dev)
|
||||
res = acc_device_type (base_dev->type);
|
||||
if (thr && thr->base_dev)
|
||||
res = acc_device_type (thr->base_dev->type);
|
||||
else
|
||||
{
|
||||
gomp_init_targets_once ();
|
||||
@ -475,78 +482,65 @@ int
|
||||
acc_get_device_num (acc_device_t d)
|
||||
{
|
||||
const struct gomp_device_descr *dev;
|
||||
int num;
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (d >= _ACC_device_hwm)
|
||||
gomp_fatal ("device %u out of range", (unsigned)d);
|
||||
|
||||
if (!base_dev)
|
||||
if (!cached_base_dev)
|
||||
gomp_init_targets_once ();
|
||||
|
||||
dev = resolve_device (d);
|
||||
if (!dev)
|
||||
gomp_fatal ("no devices of type %u", d);
|
||||
gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
|
||||
|
||||
/* We might not have called lazy_open for this host thread yet, in which case
|
||||
the get_device_num_func hook will return -1. */
|
||||
num = dev->openacc.get_device_num_func ();
|
||||
if (num < 0)
|
||||
num = goacc_device_num;
|
||||
if (thr && thr->base_dev == dev && thr->dev)
|
||||
return thr->dev->target_id;
|
||||
|
||||
return num;
|
||||
return goacc_device_num;
|
||||
}
|
||||
|
||||
ialias (acc_get_device_num)
|
||||
|
||||
void
|
||||
acc_set_device_num (int n, acc_device_t d)
|
||||
acc_set_device_num (int ord, acc_device_t d)
|
||||
{
|
||||
const struct gomp_device_descr *dev;
|
||||
struct gomp_device_descr *base_dev, *acc_dev;
|
||||
int num_devices;
|
||||
|
||||
if (!base_dev)
|
||||
if (!cached_base_dev)
|
||||
gomp_init_targets_once ();
|
||||
|
||||
if (ord < 0)
|
||||
ord = goacc_device_num;
|
||||
|
||||
if ((int) d == 0)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* A device setting of zero sets all device types on the system to use
|
||||
the Nth instance of that device type. Only attempt it for initialized
|
||||
devices though. */
|
||||
for (i = acc_device_not_host + 1; i < _ACC_device_hwm; i++)
|
||||
{
|
||||
dev = resolve_device (d);
|
||||
if (dev && dev->is_initialized)
|
||||
dev->openacc.set_device_num_func (n);
|
||||
}
|
||||
|
||||
/* ...and for future calls to acc_init/acc_set_device_type, etc. */
|
||||
goacc_device_num = n;
|
||||
}
|
||||
/* Set whatever device is being used by the current host thread to use
|
||||
device instance ORD. It's unclear if this is supposed to affect other
|
||||
host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */
|
||||
goacc_attach_host_thread_to_device (ord);
|
||||
else
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
gomp_mutex_lock (&acc_device_lock);
|
||||
|
||||
base_dev = lazy_init (d);
|
||||
cached_base_dev = base_dev = resolve_device (d);
|
||||
|
||||
num_devices = base_dev->get_num_devices_func ();
|
||||
|
||||
if (n >= num_devices)
|
||||
gomp_fatal ("device %u out of range", n);
|
||||
if (ord >= num_devices)
|
||||
gomp_fatal ("device %u out of range", ord);
|
||||
|
||||
/* If we're changing the device number, de-associate this thread with
|
||||
the device (but don't close the device, since it may be in use by
|
||||
other threads). */
|
||||
if (thr && thr->dev && n != thr->dev->target_id)
|
||||
thr->dev = NULL;
|
||||
acc_dev = &base_dev[ord];
|
||||
|
||||
lazy_open (n);
|
||||
if (!acc_dev->is_initialized)
|
||||
gomp_init_device (acc_dev);
|
||||
|
||||
gomp_mutex_unlock (&acc_device_lock);
|
||||
|
||||
goacc_attach_host_thread_to_device (ord);
|
||||
}
|
||||
|
||||
goacc_device_num = ord;
|
||||
}
|
||||
|
||||
ialias (acc_set_device_num)
|
||||
@ -554,10 +548,7 @@ ialias (acc_set_device_num)
|
||||
int
|
||||
acc_on_device (acc_device_t dev)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (thr && thr->dev
|
||||
&& acc_device_type (thr->dev->type) == acc_device_host_nonshm)
|
||||
if (acc_get_device_type () == acc_device_host_nonshm)
|
||||
return dev == acc_device_host_nonshm || dev == acc_device_not_host;
|
||||
|
||||
/* Just rely on the compiler builtin. */
|
||||
@ -577,7 +568,7 @@ goacc_runtime_initialize (void)
|
||||
|
||||
pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
|
||||
|
||||
base_dev = NULL;
|
||||
cached_base_dev = NULL;
|
||||
|
||||
goacc_threads = NULL;
|
||||
gomp_mutex_init (&goacc_thread_lock);
|
||||
@ -606,9 +597,8 @@ goacc_restore_bind (void)
|
||||
}
|
||||
|
||||
/* This is called from any OpenACC support function that may need to implicitly
|
||||
initialize the libgomp runtime. On exit all such initialization will have
|
||||
been done, and both the global ACC_dev and the per-host-thread ACC_memmap
|
||||
pointers will be valid. */
|
||||
initialize the libgomp runtime, either globally or from a new host thread.
|
||||
On exit "goacc_thread" will return a valid & populated thread block. */
|
||||
|
||||
attribute_hidden void
|
||||
goacc_lazy_initialize (void)
|
||||
@ -618,12 +608,8 @@ goacc_lazy_initialize (void)
|
||||
if (thr && thr->dev)
|
||||
return;
|
||||
|
||||
if (!base_dev)
|
||||
lazy_init_and_open (acc_device_default);
|
||||
if (!cached_base_dev)
|
||||
acc_init (acc_device_default);
|
||||
else
|
||||
{
|
||||
gomp_mutex_lock (&acc_device_lock);
|
||||
lazy_open (-1);
|
||||
gomp_mutex_unlock (&acc_device_lock);
|
||||
}
|
||||
goacc_attach_host_thread_to_device (-1);
|
||||
}
|
||||
|
@ -56,6 +56,9 @@ acc_device_type (enum offload_target_type type)
|
||||
|
||||
struct goacc_thread
|
||||
{
|
||||
/* The base device for the current thread. */
|
||||
struct gomp_device_descr *base_dev;
|
||||
|
||||
/* The device for the current thread. */
|
||||
struct gomp_device_descr *dev;
|
||||
|
||||
@ -89,10 +92,7 @@ goacc_thread (void)
|
||||
#endif
|
||||
|
||||
void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW;
|
||||
|
||||
/* Current dispatcher. */
|
||||
extern struct gomp_device_descr *base_dev;
|
||||
|
||||
void goacc_attach_host_thread_to_device (int);
|
||||
void goacc_runtime_initialize (void);
|
||||
void goacc_save_and_set_bind (acc_device_t);
|
||||
void goacc_restore_bind (void);
|
||||
|
@ -107,7 +107,9 @@ acc_malloc (size_t s)
|
||||
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
return base_dev->alloc_func (thr->dev->target_id, s);
|
||||
assert (thr->dev);
|
||||
|
||||
return thr->dev->alloc_func (thr->dev->target_id, s);
|
||||
}
|
||||
|
||||
/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
|
||||
@ -122,6 +124,8 @@ acc_free (void *d)
|
||||
if (!d)
|
||||
return;
|
||||
|
||||
assert (thr && thr->dev);
|
||||
|
||||
/* We don't have to call lazy open here, as the ptr value must have
|
||||
been returned by acc_malloc. It's not permitted to pass NULL in
|
||||
(unless you got that null from acc_malloc). */
|
||||
@ -134,7 +138,7 @@ acc_free (void *d)
|
||||
acc_unmap_data ((void *)(k->host_start + offset));
|
||||
}
|
||||
|
||||
base_dev->free_func (thr->dev->target_id, d);
|
||||
thr->dev->free_func (thr->dev->target_id, d);
|
||||
}
|
||||
|
||||
void
|
||||
@ -144,7 +148,9 @@ acc_memcpy_to_device (void *d, void *h, size_t s)
|
||||
been obtained from a routine that did that. */
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
base_dev->host2dev_func (thr->dev->target_id, d, h, s);
|
||||
assert (thr && thr->dev);
|
||||
|
||||
thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
|
||||
}
|
||||
|
||||
void
|
||||
@ -154,7 +160,9 @@ acc_memcpy_from_device (void *h, void *d, size_t s)
|
||||
been obtained from a routine that did that. */
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
base_dev->dev2host_func (thr->dev->target_id, h, d, s);
|
||||
assert (thr && thr->dev);
|
||||
|
||||
thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
|
||||
}
|
||||
|
||||
/* Return the device pointer that corresponds to host data H. Or NULL
|
||||
|
@ -49,32 +49,6 @@ find_pset (int pos, size_t mapnum, unsigned short *kinds)
|
||||
return kind == GOMP_MAP_TO_PSET;
|
||||
}
|
||||
|
||||
|
||||
/* Ensure that the target device for DEVICE_TYPE is initialised (and that
|
||||
plugins have been loaded if appropriate). The ACC_dev variable for the
|
||||
current thread will be set appropriately for the given device type on
|
||||
return. */
|
||||
|
||||
attribute_hidden void
|
||||
select_acc_device (int device_type)
|
||||
{
|
||||
goacc_lazy_initialize ();
|
||||
|
||||
if (device_type == GOMP_DEVICE_HOST_FALLBACK)
|
||||
return;
|
||||
|
||||
if (device_type == acc_device_none)
|
||||
device_type = acc_device_host;
|
||||
|
||||
if (device_type >= 0)
|
||||
{
|
||||
/* NOTE: this will go badly if the surrounding data environment is set up
|
||||
to use a different device type. We'll just have to trust that users
|
||||
know what they're doing... */
|
||||
acc_set_device_type (device_type);
|
||||
}
|
||||
}
|
||||
|
||||
static void goacc_wait (int async, int num_waits, va_list ap);
|
||||
|
||||
void
|
||||
@ -111,7 +85,7 @@ GOACC_parallel (int device, void (*fn) (void *),
|
||||
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
|
||||
async);
|
||||
#endif
|
||||
select_acc_device (device);
|
||||
goacc_lazy_initialize ();
|
||||
|
||||
thr = goacc_thread ();
|
||||
acc_dev = thr->dev;
|
||||
@ -151,7 +125,7 @@ GOACC_parallel (int device, void (*fn) (void *),
|
||||
if (tgt_fn_key == NULL)
|
||||
gomp_fatal ("target function wasn't mapped");
|
||||
|
||||
tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
|
||||
tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
|
||||
}
|
||||
else
|
||||
tgt_fn = (void (*)) fn;
|
||||
@ -195,7 +169,7 @@ GOACC_data_start (int device, size_t mapnum,
|
||||
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
|
||||
#endif
|
||||
|
||||
select_acc_device (device);
|
||||
goacc_lazy_initialize ();
|
||||
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
struct gomp_device_descr *acc_dev = thr->dev;
|
||||
@ -242,7 +216,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
|
||||
bool data_enter = false;
|
||||
size_t i;
|
||||
|
||||
select_acc_device (device);
|
||||
goacc_lazy_initialize ();
|
||||
|
||||
thr = goacc_thread ();
|
||||
acc_dev = thr->dev;
|
||||
@ -429,7 +403,7 @@ GOACC_update (int device, size_t mapnum,
|
||||
bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
|
||||
size_t i;
|
||||
|
||||
select_acc_device (device);
|
||||
goacc_lazy_initialize ();
|
||||
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
struct gomp_device_descr *acc_dev = thr->dev;
|
||||
|
@ -118,31 +118,6 @@ GOMP_OFFLOAD_unload_image (int n __attribute__ ((unused)),
|
||||
{
|
||||
}
|
||||
|
||||
STATIC void *
|
||||
GOMP_OFFLOAD_openacc_open_device (int n)
|
||||
{
|
||||
return (void *) (intptr_t) n;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
GOMP_OFFLOAD_openacc_close_device (void *hnd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
GOMP_OFFLOAD_openacc_get_device_num (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
GOMP_OFFLOAD_openacc_set_device_num (int n)
|
||||
{
|
||||
if (n > 0)
|
||||
GOMP (fatal) ("device number %u out of range for host execution", n);
|
||||
}
|
||||
|
||||
STATIC void *
|
||||
GOMP_OFFLOAD_alloc (int n __attribute__ ((unused)), size_t s)
|
||||
{
|
||||
@ -254,7 +229,7 @@ GOMP_OFFLOAD_openacc_async_wait_all_async (int async __attribute__ ((unused)))
|
||||
}
|
||||
|
||||
STATIC void *
|
||||
GOMP_OFFLOAD_openacc_create_thread_data (void *targ_data
|
||||
GOMP_OFFLOAD_openacc_create_thread_data (int ord
|
||||
__attribute__ ((unused)))
|
||||
{
|
||||
return NULL;
|
||||
|
@ -133,7 +133,8 @@ struct targ_fn_descriptor
|
||||
const char *name;
|
||||
};
|
||||
|
||||
static bool ptx_inited = false;
|
||||
static unsigned int instantiated_devices = 0;
|
||||
static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
struct ptx_stream
|
||||
{
|
||||
@ -331,9 +332,21 @@ struct ptx_event
|
||||
struct ptx_event *next;
|
||||
};
|
||||
|
||||
struct ptx_image_data
|
||||
{
|
||||
void *target_data;
|
||||
CUmodule module;
|
||||
struct ptx_image_data *next;
|
||||
};
|
||||
|
||||
static pthread_mutex_t ptx_event_lock;
|
||||
static struct ptx_event *ptx_events;
|
||||
|
||||
static struct ptx_device **ptx_devices;
|
||||
|
||||
static struct ptx_image_data *ptx_images = NULL;
|
||||
static pthread_mutex_t ptx_image_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
#define _XSTR(s) _STR(s)
|
||||
#define _STR(s) #s
|
||||
|
||||
@ -450,8 +463,8 @@ fini_streams_for_device (struct ptx_device *ptx_dev)
|
||||
struct ptx_stream *s = ptx_dev->active_streams;
|
||||
ptx_dev->active_streams = ptx_dev->active_streams->next;
|
||||
|
||||
cuStreamDestroy (s->stream);
|
||||
map_fini (s);
|
||||
cuStreamDestroy (s->stream);
|
||||
free (s);
|
||||
}
|
||||
|
||||
@ -575,21 +588,21 @@ select_stream_for_async (int async, pthread_t thread, bool create,
|
||||
return stream;
|
||||
}
|
||||
|
||||
static int nvptx_get_num_devices (void);
|
||||
|
||||
/* Initialize the device. */
|
||||
static int
|
||||
/* Initialize the device. Return TRUE on success, else FALSE. PTX_DEV_LOCK
|
||||
should be locked on entry and remains locked on exit. */
|
||||
static bool
|
||||
nvptx_init (void)
|
||||
{
|
||||
CUresult r;
|
||||
int rc;
|
||||
int ndevs;
|
||||
|
||||
if (ptx_inited)
|
||||
return nvptx_get_num_devices ();
|
||||
if (instantiated_devices != 0)
|
||||
return true;
|
||||
|
||||
rc = verify_device_library ();
|
||||
if (rc < 0)
|
||||
return -1;
|
||||
return false;
|
||||
|
||||
r = cuInit (0);
|
||||
if (r != CUDA_SUCCESS)
|
||||
@ -599,22 +612,64 @@ nvptx_init (void)
|
||||
|
||||
pthread_mutex_init (&ptx_event_lock, NULL);
|
||||
|
||||
ptx_inited = true;
|
||||
r = cuDeviceGetCount (&ndevs);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuDeviceGetCount error: %s", cuda_error (r));
|
||||
|
||||
return nvptx_get_num_devices ();
|
||||
ptx_devices = GOMP_PLUGIN_malloc_cleared (sizeof (struct ptx_device *)
|
||||
* ndevs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Select the N'th PTX device for the current host thread. The device must
|
||||
have been previously opened before calling this function. */
|
||||
|
||||
static void
|
||||
nvptx_fini (void)
|
||||
nvptx_attach_host_thread_to_device (int n)
|
||||
{
|
||||
ptx_inited = false;
|
||||
CUdevice dev;
|
||||
CUresult r;
|
||||
struct ptx_device *ptx_dev;
|
||||
CUcontext thd_ctx;
|
||||
|
||||
r = cuCtxGetDevice (&dev);
|
||||
if (r != CUDA_SUCCESS && r != CUDA_ERROR_INVALID_CONTEXT)
|
||||
GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (r));
|
||||
|
||||
if (r != CUDA_ERROR_INVALID_CONTEXT && dev == n)
|
||||
return;
|
||||
else
|
||||
{
|
||||
CUcontext old_ctx;
|
||||
|
||||
ptx_dev = ptx_devices[n];
|
||||
assert (ptx_dev);
|
||||
|
||||
r = cuCtxGetCurrent (&thd_ctx);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
|
||||
|
||||
/* We don't necessarily have a current context (e.g. if it has been
|
||||
destroyed. Pop it if we do though. */
|
||||
if (thd_ctx != NULL)
|
||||
{
|
||||
r = cuCtxPopCurrent (&old_ctx);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuCtxPopCurrent error: %s", cuda_error (r));
|
||||
}
|
||||
|
||||
r = cuCtxPushCurrent (ptx_dev->ctx);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuCtxPushCurrent error: %s", cuda_error (r));
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
static struct ptx_device *
|
||||
nvptx_open_device (int n)
|
||||
{
|
||||
struct ptx_device *ptx_dev;
|
||||
CUdevice dev;
|
||||
CUdevice dev, ctx_dev;
|
||||
CUresult r;
|
||||
int async_engines, pi;
|
||||
|
||||
@ -628,6 +683,21 @@ nvptx_open_device (int n)
|
||||
ptx_dev->dev = dev;
|
||||
ptx_dev->ctx_shared = false;
|
||||
|
||||
r = cuCtxGetDevice (&ctx_dev);
|
||||
if (r != CUDA_SUCCESS && r != CUDA_ERROR_INVALID_CONTEXT)
|
||||
GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (r));
|
||||
|
||||
if (r != CUDA_ERROR_INVALID_CONTEXT && ctx_dev != dev)
|
||||
{
|
||||
/* The current host thread has an active context for a different device.
|
||||
Detach it. */
|
||||
CUcontext old_ctx;
|
||||
|
||||
r = cuCtxPopCurrent (&old_ctx);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuCtxPopCurrent error: %s", cuda_error (r));
|
||||
}
|
||||
|
||||
r = cuCtxGetCurrent (&ptx_dev->ctx);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
|
||||
@ -678,17 +748,16 @@ nvptx_open_device (int n)
|
||||
|
||||
init_streams_for_device (ptx_dev, async_engines);
|
||||
|
||||
return (void *) ptx_dev;
|
||||
return ptx_dev;
|
||||
}
|
||||
|
||||
static int
|
||||
nvptx_close_device (void *targ_data)
|
||||
static void
|
||||
nvptx_close_device (struct ptx_device *ptx_dev)
|
||||
{
|
||||
CUresult r;
|
||||
struct ptx_device *ptx_dev = targ_data;
|
||||
|
||||
if (!ptx_dev)
|
||||
return 0;
|
||||
return;
|
||||
|
||||
fini_streams_for_device (ptx_dev);
|
||||
|
||||
@ -700,8 +769,6 @@ nvptx_close_device (void *targ_data)
|
||||
}
|
||||
|
||||
free (ptx_dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -714,7 +781,7 @@ nvptx_get_num_devices (void)
|
||||
order to enumerate available devices, but CUDA API routines can't be used
|
||||
until cuInit has been called. Just call it now (but don't yet do any
|
||||
further initialization). */
|
||||
if (!ptx_inited)
|
||||
if (instantiated_devices == 0)
|
||||
cuInit (0);
|
||||
|
||||
r = cuDeviceGetCount (&n);
|
||||
@ -1507,64 +1574,84 @@ GOMP_OFFLOAD_get_num_devices (void)
|
||||
return nvptx_get_num_devices ();
|
||||
}
|
||||
|
||||
static void **kernel_target_data;
|
||||
static void **kernel_host_table;
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_register_image (void *host_table, void *target_data)
|
||||
GOMP_OFFLOAD_init_device (int n)
|
||||
{
|
||||
kernel_target_data = target_data;
|
||||
kernel_host_table = host_table;
|
||||
pthread_mutex_lock (&ptx_dev_lock);
|
||||
|
||||
if (!nvptx_init () || ptx_devices[n] != NULL)
|
||||
{
|
||||
pthread_mutex_unlock (&ptx_dev_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
ptx_devices[n] = nvptx_open_device (n);
|
||||
instantiated_devices++;
|
||||
|
||||
pthread_mutex_unlock (&ptx_dev_lock);
|
||||
}
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_init_device (int n __attribute__ ((unused)))
|
||||
GOMP_OFFLOAD_fini_device (int n)
|
||||
{
|
||||
(void) nvptx_init ();
|
||||
}
|
||||
pthread_mutex_lock (&ptx_dev_lock);
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused)))
|
||||
{
|
||||
nvptx_fini ();
|
||||
if (ptx_devices[n] != NULL)
|
||||
{
|
||||
nvptx_attach_host_thread_to_device (n);
|
||||
nvptx_close_device (ptx_devices[n]);
|
||||
ptx_devices[n] = NULL;
|
||||
instantiated_devices--;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock (&ptx_dev_lock);
|
||||
}
|
||||
|
||||
int
|
||||
GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)),
|
||||
struct mapping_table **tablep)
|
||||
GOMP_OFFLOAD_load_image (int ord, void *target_data,
|
||||
struct addr_pair **target_table)
|
||||
{
|
||||
CUmodule module;
|
||||
void **fn_table;
|
||||
char **fn_names;
|
||||
int fn_entries, i;
|
||||
char **fn_names, **var_names;
|
||||
unsigned int fn_entries, var_entries, i, j;
|
||||
CUresult r;
|
||||
struct targ_fn_descriptor *targ_fns;
|
||||
void **img_header = (void **) target_data;
|
||||
struct ptx_image_data *new_image;
|
||||
|
||||
if (nvptx_init () <= 0)
|
||||
return 0;
|
||||
GOMP_OFFLOAD_init_device (ord);
|
||||
|
||||
/* This isn't an error, because an image may legitimately have no offloaded
|
||||
regions and so will not call GOMP_offload_register. */
|
||||
if (kernel_target_data == NULL)
|
||||
return 0;
|
||||
nvptx_attach_host_thread_to_device (ord);
|
||||
|
||||
link_ptx (&module, kernel_target_data[0]);
|
||||
link_ptx (&module, img_header[0]);
|
||||
|
||||
/* kernel_target_data[0] -> ptx code
|
||||
kernel_target_data[1] -> variable mappings
|
||||
kernel_target_data[2] -> array of kernel names in ascii
|
||||
pthread_mutex_lock (&ptx_image_lock);
|
||||
new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
|
||||
new_image->target_data = target_data;
|
||||
new_image->module = module;
|
||||
new_image->next = ptx_images;
|
||||
ptx_images = new_image;
|
||||
pthread_mutex_unlock (&ptx_image_lock);
|
||||
|
||||
kernel_host_table[0] -> start of function addresses (__offload_func_table)
|
||||
kernel_host_table[1] -> end of function addresses (__offload_funcs_end)
|
||||
/* The mkoffload utility emits a table of pointers/integers at the start of
|
||||
each offload image:
|
||||
|
||||
img_header[0] -> ptx code
|
||||
img_header[1] -> number of variables
|
||||
img_header[2] -> array of variable names (pointers to strings)
|
||||
img_header[3] -> number of kernels
|
||||
img_header[4] -> array of kernel names (pointers to strings)
|
||||
|
||||
The array of kernel names and the functions addresses form a
|
||||
one-to-one correspondence. */
|
||||
|
||||
fn_table = kernel_host_table[0];
|
||||
fn_names = (char **) kernel_target_data[2];
|
||||
fn_entries = (kernel_host_table[1] - kernel_host_table[0]) / sizeof (void *);
|
||||
var_entries = (uintptr_t) img_header[1];
|
||||
var_names = (char **) img_header[2];
|
||||
fn_entries = (uintptr_t) img_header[3];
|
||||
fn_names = (char **) img_header[4];
|
||||
|
||||
*tablep = GOMP_PLUGIN_malloc (sizeof (struct mapping_table) * fn_entries);
|
||||
*target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
|
||||
* (fn_entries + var_entries));
|
||||
targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
|
||||
* fn_entries);
|
||||
|
||||
@ -1579,38 +1666,86 @@ GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)),
|
||||
targ_fns[i].fn = function;
|
||||
targ_fns[i].name = (const char *) fn_names[i];
|
||||
|
||||
(*tablep)[i].host_start = (uintptr_t) fn_table[i];
|
||||
(*tablep)[i].host_end = (*tablep)[i].host_start + 1;
|
||||
(*tablep)[i].tgt_start = (uintptr_t) &targ_fns[i];
|
||||
(*tablep)[i].tgt_end = (*tablep)[i].tgt_start + 1;
|
||||
(*target_table)[i].start = (uintptr_t) &targ_fns[i];
|
||||
(*target_table)[i].end = (*target_table)[i].start + 1;
|
||||
}
|
||||
|
||||
return fn_entries;
|
||||
for (j = 0; j < var_entries; j++, i++)
|
||||
{
|
||||
CUdeviceptr var;
|
||||
size_t bytes;
|
||||
|
||||
r = cuModuleGetGlobal (&var, &bytes, module, var_names[j]);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
|
||||
|
||||
(*target_table)[i].start = (uintptr_t) var;
|
||||
(*target_table)[i].end = (*target_table)[i].start + bytes;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)), void *target_data)
|
||||
{
|
||||
void **img_header = (void **) target_data;
|
||||
struct targ_fn_descriptor *targ_fns
|
||||
= (struct targ_fn_descriptor *) img_header[0];
|
||||
struct ptx_image_data *image, *prev = NULL, *newhd = NULL;
|
||||
|
||||
free (targ_fns);
|
||||
|
||||
pthread_mutex_lock (&ptx_image_lock);
|
||||
for (image = ptx_images; image != NULL;)
|
||||
{
|
||||
struct ptx_image_data *next = image->next;
|
||||
|
||||
if (image->target_data == target_data)
|
||||
{
|
||||
cuModuleUnload (image->module);
|
||||
free (image);
|
||||
if (prev)
|
||||
prev->next = next;
|
||||
}
|
||||
else
|
||||
{
|
||||
prev = image;
|
||||
if (!newhd)
|
||||
newhd = image;
|
||||
}
|
||||
|
||||
image = next;
|
||||
}
|
||||
ptx_images = newhd;
|
||||
pthread_mutex_unlock (&ptx_image_lock);
|
||||
}
|
||||
|
||||
void *
|
||||
GOMP_OFFLOAD_alloc (int n __attribute__ ((unused)), size_t size)
|
||||
GOMP_OFFLOAD_alloc (int ord, size_t size)
|
||||
{
|
||||
nvptx_attach_host_thread_to_device (ord);
|
||||
return nvptx_alloc (size);
|
||||
}
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_free (int n __attribute__ ((unused)), void *ptr)
|
||||
GOMP_OFFLOAD_free (int ord, void *ptr)
|
||||
{
|
||||
nvptx_attach_host_thread_to_device (ord);
|
||||
nvptx_free (ptr);
|
||||
}
|
||||
|
||||
void *
|
||||
GOMP_OFFLOAD_dev2host (int ord __attribute__ ((unused)), void *dst,
|
||||
const void *src, size_t n)
|
||||
GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
|
||||
{
|
||||
nvptx_attach_host_thread_to_device (ord);
|
||||
return nvptx_dev2host (dst, src, n);
|
||||
}
|
||||
|
||||
void *
|
||||
GOMP_OFFLOAD_host2dev (int ord __attribute__ ((unused)), void *dst,
|
||||
const void *src, size_t n)
|
||||
GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
|
||||
{
|
||||
nvptx_attach_host_thread_to_device (ord);
|
||||
return nvptx_host2dev (dst, src, n);
|
||||
}
|
||||
|
||||
@ -1627,45 +1762,6 @@ GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *), size_t mapnum,
|
||||
num_workers, vector_length, async, targ_mem_desc);
|
||||
}
|
||||
|
||||
void *
|
||||
GOMP_OFFLOAD_openacc_open_device (int n)
|
||||
{
|
||||
return nvptx_open_device (n);
|
||||
}
|
||||
|
||||
int
|
||||
GOMP_OFFLOAD_openacc_close_device (void *h)
|
||||
{
|
||||
return nvptx_close_device (h);
|
||||
}
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_openacc_set_device_num (int n)
|
||||
{
|
||||
struct nvptx_thread *nvthd = nvptx_thread ();
|
||||
|
||||
assert (n >= 0);
|
||||
|
||||
if (!nvthd->ptx_dev || nvthd->ptx_dev->ord != n)
|
||||
(void) nvptx_open_device (n);
|
||||
}
|
||||
|
||||
/* This can be called before the device is "opened" for the current thread, in
|
||||
which case we can't tell which device number should be returned. We don't
|
||||
actually want to open the device here, so just return -1 and let the caller
|
||||
(oacc-init.c:acc_get_device_num) handle it. */
|
||||
|
||||
int
|
||||
GOMP_OFFLOAD_openacc_get_device_num (void)
|
||||
{
|
||||
struct nvptx_thread *nvthd = nvptx_thread ();
|
||||
|
||||
if (nvthd && nvthd->ptx_dev)
|
||||
return nvthd->ptx_dev->ord;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_openacc_register_async_cleanup (void *targ_mem_desc)
|
||||
{
|
||||
@ -1729,14 +1825,18 @@ GOMP_OFFLOAD_openacc_async_set_async (int async)
|
||||
}
|
||||
|
||||
void *
|
||||
GOMP_OFFLOAD_openacc_create_thread_data (void *targ_data)
|
||||
GOMP_OFFLOAD_openacc_create_thread_data (int ord)
|
||||
{
|
||||
struct ptx_device *ptx_dev = (struct ptx_device *) targ_data;
|
||||
struct ptx_device *ptx_dev;
|
||||
struct nvptx_thread *nvthd
|
||||
= GOMP_PLUGIN_malloc (sizeof (struct nvptx_thread));
|
||||
CUresult r;
|
||||
CUcontext thd_ctx;
|
||||
|
||||
ptx_dev = ptx_devices[ord];
|
||||
|
||||
assert (ptx_dev);
|
||||
|
||||
r = cuCtxGetCurrent (&thd_ctx);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
|
||||
|
@ -178,7 +178,6 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
tgt->list_count = mapnum;
|
||||
tgt->refcount = 1;
|
||||
tgt->device_descr = devicep;
|
||||
tgt->mem_map = mem_map;
|
||||
|
||||
if (mapnum == 0)
|
||||
return tgt;
|
||||
@ -597,7 +596,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
||||
devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset),
|
||||
k->host_end - k->host_start);
|
||||
splay_tree_remove (tgt->mem_map, k);
|
||||
splay_tree_remove (&devicep->mem_map, k);
|
||||
if (k->tgt->refcount > 1)
|
||||
k->tgt->refcount--;
|
||||
else
|
||||
@ -1159,10 +1158,6 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
||||
{
|
||||
optional_present = optional_total = 0;
|
||||
DLSYM_OPT (openacc.exec, openacc_parallel);
|
||||
DLSYM_OPT (openacc.open_device, openacc_open_device);
|
||||
DLSYM_OPT (openacc.close_device, openacc_close_device);
|
||||
DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
|
||||
DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
|
||||
DLSYM_OPT (openacc.register_async_cleanup,
|
||||
openacc_register_async_cleanup);
|
||||
DLSYM_OPT (openacc.async_test, openacc_async_test);
|
||||
@ -1271,7 +1266,6 @@ gomp_target_init (void)
|
||||
current_device.mem_map.root = NULL;
|
||||
current_device.is_initialized = false;
|
||||
current_device.openacc.data_environ = NULL;
|
||||
current_device.openacc.target_data = NULL;
|
||||
for (i = 0; i < new_num_devices; i++)
|
||||
{
|
||||
current_device.target_id = i;
|
||||
|
@ -58,7 +58,7 @@ main (int argc, char **argv)
|
||||
acc_set_device_num (1, (acc_device_t) 0);
|
||||
|
||||
devnum = acc_get_device_num (devtype);
|
||||
if (devnum != 0)
|
||||
if (devnum != 1)
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user