mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-25 23:54:58 +08:00
ea4b23d9c8
This commit enabled reverse offload for nvptx such that gomp_target_rev actually gets called. And it fills the latter function to do all of the following: finding the host function to the device func ptr and copying the arguments to the host, processing the mapping/firstprivate, calling the host function, copying back the data and freeing as needed. The data handling is made easier by assuming that all host variables either existed before (and are in the mapping) or that those are devices variables not yet available on the host. Thus, the reverse mapping can do without refcounts etc. Note that the spec disallows inside a target region device-affecting constructs other than target plus ancestor device-modifier and it also limits the clauses permitted on this construct. For the function addresses, an additional splay tree is used; for the lookup of mapped variables, the existing splay-tree is used. Unfortunately, its data structure requires a full walk of the tree; Additionally, the just mapped variables are recorded in a separate data structure an extra lookup. While the lookup is slow, assuming that only few variables get mapped in each reverse offload construct and that reverse offload is the exception and not performance critical, this seems to be acceptable. libgomp/ChangeLog: * libgomp.h (struct target_mem_desc): Predeclare; move below after 'reverse_splay_tree_node' and add rev_array member. (struct reverse_splay_tree_key_s, reverse_splay_compare): New. (reverse_splay_tree_node, reverse_splay_tree, reverse_splay_tree_key): New typedef. (struct gomp_device_descr): Add mem_map_rev member. * oacc-host.c (host_dispatch): NULL init .mem_map_rev. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices): Claim support for GOMP_REQUIRES_REVERSE_OFFLOAD. * splay-tree.h (splay_tree_callback_stop): New typedef; like splay_tree_callback but returning int not void. (splay_tree_foreach_lazy): Define; like splay_tree_foreach but taking splay_tree_callback_stop as argument. * splay-tree.c (splay_tree_foreach_internal_lazy, splay_tree_foreach_lazy): New; but early exit if callback returns nonzero. * target.c: Instatiate splay_tree_c with splay_tree_prefix 'reverse'. (gomp_map_lookup_rev): New. (gomp_load_image_to_device): Handle reverse-offload function lookup table. (gomp_unload_image_from_device): Free devicep->mem_map_rev. (struct gomp_splay_tree_rev_lookup_data, gomp_splay_tree_rev_lookup, gomp_map_rev_lookup, struct cpy_data, gomp_map_cdata_lookup_int, gomp_map_cdata_lookup): New auxiliary structs and functions for gomp_target_rev. (gomp_target_rev): Implement reverse offloading and its mapping. (gomp_target_init): Init current_device.mem_map_rev.root. * testsuite/libgomp.fortran/reverse-offload-2.f90: New test. * testsuite/libgomp.fortran/reverse-offload-3.f90: New test. * testsuite/libgomp.fortran/reverse-offload-4.f90: New test. * testsuite/libgomp.fortran/reverse-offload-5.f90: New test. * testsuite/libgomp.fortran/reverse-offload-5a.f90: New test without mapping of on-device allocated variables.
139 lines
5.2 KiB
C
139 lines
5.2 KiB
C
/* A splay-tree datatype.
|
|
Copyright (C) 1998-2022 Free Software Foundation, Inc.
|
|
Contributed by Mark Mitchell (mark@markmitchell.com).
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* The splay tree code copied from include/splay-tree.h and adjusted,
|
|
so that all the data lives directly in splay_tree_node_s structure
|
|
and no extra allocations are needed.
|
|
|
|
Files including this header should before including it add:
|
|
typedef struct splay_tree_node_s *splay_tree_node;
|
|
typedef struct splay_tree_s *splay_tree;
|
|
typedef struct splay_tree_key_s *splay_tree_key;
|
|
define splay_tree_key_s structure, and define
|
|
splay_compare inline function.
|
|
|
|
Alternatively, they can define splay_tree_prefix macro before
|
|
including this header and then all the above types, the
|
|
splay_compare function and the splay_tree_{lookup,insert_remove}
|
|
function will be prefixed by that prefix. If splay_tree_prefix
|
|
macro is defined, this header must be included twice: once where
|
|
you need the header file definitions, and once where you need the
|
|
.c implementation routines. In the latter case, you must also
|
|
define the macro splay_tree_c. See the include of splay-tree.h in
|
|
priority_queue.[hc] for an example. */
|
|
|
|
/* For an easily readable description of splay-trees, see:
|
|
|
|
Lewis, Harry R. and Denenberg, Larry. Data Structures and Their
|
|
Algorithms. Harper-Collins, Inc. 1991.
|
|
|
|
The major feature of splay trees is that all basic tree operations
|
|
are amortized O(log n) time for a tree with n nodes. */
|
|
|
|
#ifdef splay_tree_prefix
|
|
# define splay_tree_name_1(prefix, name) prefix ## _ ## name
|
|
# define splay_tree_name(prefix, name) splay_tree_name_1 (prefix, name)
|
|
# define splay_tree_node_s \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_node_s)
|
|
# define splay_tree_s \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_s)
|
|
# define splay_tree_key_s \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_key_s)
|
|
# define splay_tree_node \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_node)
|
|
# define splay_tree \
|
|
splay_tree_name (splay_tree_prefix, splay_tree)
|
|
# define splay_tree_key \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_key)
|
|
# define splay_compare \
|
|
splay_tree_name (splay_tree_prefix, splay_compare)
|
|
# define splay_tree_lookup \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_lookup)
|
|
# define splay_tree_insert \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_insert)
|
|
# define splay_tree_remove \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_remove)
|
|
# define splay_tree_foreach \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_foreach)
|
|
# define splay_tree_foreach_lazy \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_foreach_lazy)
|
|
# define splay_tree_callback \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_callback)
|
|
# define splay_tree_callback_stop \
|
|
splay_tree_name (splay_tree_prefix, splay_tree_callback_stop)
|
|
#endif
|
|
|
|
#ifndef splay_tree_c
|
|
/* Header file definitions and prototypes. */
|
|
|
|
/* The nodes in the splay tree. */
|
|
struct splay_tree_node_s {
|
|
struct splay_tree_key_s key;
|
|
/* The left and right children, respectively. */
|
|
splay_tree_node left;
|
|
splay_tree_node right;
|
|
};
|
|
|
|
/* The splay tree. */
|
|
struct splay_tree_s {
|
|
splay_tree_node root;
|
|
};
|
|
|
|
typedef void (*splay_tree_callback) (splay_tree_key, void *);
|
|
typedef int (*splay_tree_callback_stop) (splay_tree_key, void *);
|
|
|
|
extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key);
|
|
extern void splay_tree_insert (splay_tree, splay_tree_node);
|
|
extern void splay_tree_remove (splay_tree, splay_tree_key);
|
|
extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *);
|
|
extern void splay_tree_foreach_lazy (splay_tree, splay_tree_callback_stop, void *);
|
|
#else /* splay_tree_c */
|
|
# ifdef splay_tree_prefix
|
|
# include "splay-tree.c"
|
|
# endif
|
|
# undef splay_tree_c
|
|
#endif /* #ifndef splay_tree_c */
|
|
|
|
#ifdef splay_tree_prefix
|
|
# undef splay_tree_name_1
|
|
# undef splay_tree_name
|
|
# undef splay_tree_node_s
|
|
# undef splay_tree_s
|
|
# undef splay_tree_key_s
|
|
# undef splay_tree_node
|
|
# undef splay_tree
|
|
# undef splay_tree_key
|
|
# undef splay_compare
|
|
# undef splay_tree_lookup
|
|
# undef splay_tree_insert
|
|
# undef splay_tree_remove
|
|
# undef splay_tree_foreach
|
|
# undef splay_tree_foreach_lazy
|
|
# undef splay_tree_callback
|
|
# undef splay_tree_callback_stop
|
|
# undef splay_tree_prefix
|
|
#endif
|