mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-12 04:04:29 +08:00
lto: LTO cgraph support for late declare variant resolution [PR96680]
> I've tried to add the saving/restoring next to ipa refs saving/restoring, as > the declare variant alt stuff is kind of extension of those, unfortunately > following doesn't compile, because I need to also write or read a tree there > (ctx is a portion of DECL_ATTRIBUTES of the base function), but the ipa refs > write/read back functions don't have arguments that can be used for that. This patch adds the streaming out and in of those omp_declare_variant_alt hash table on the side data for the declare_variant_alt cgraph_nodes and treats for LTO purposes the declare_variant_alt nodes (which have no body) as if they contained a body that calls all the possible variants. After IPA all the calls to these magic declare_variant_alt calls are replaced with call to one of the variant depending on which one has the highest score in the context. 2020-10-28 Jakub Jelinek <jakub@redhat.com> PR lto/96680 gcc/ * lto-streamer.h (omp_lto_output_declare_variant_alt, omp_lto_input_declare_variant_alt): Declare variant. * symtab.c (symtab_node::get_partitioning_class): Return SYMBOL_DUPLICATE for declare_variant_alt nodes. * passes.c (ipa_write_summaries): Add declare_variant_alt to partition. * lto-cgraph.c (output_refs): Call omp_lto_output_declare_variant_alt on declare_variant_alt nodes. (input_refs): Call omp_lto_input_declare_variant_alt on declare_variant_alt nodes. * lto-streamer-out.c (output_function): Don't call collect_block_tree_leafs if DECL_INITIAL is error_mark_node. (lto_output): Call output_function even for declare_variant_alt nodes. * omp-general.c (omp_lto_output_declare_variant_alt, omp_lto_input_declare_variant_alt): New functions. gcc/lto/ * lto-common.c (lto_fixup_prevailing_decls): Don't use LTO_NO_PREVAIL on TREE_LIST's TREE_PURPOSE. * lto-partition.c (lto_balanced_map): Treat declare_variant_alt nodes like definitions. libgomp/ * testsuite/libgomp.c/declare-variant-1.c: New test.
This commit is contained in:
parent
279a9ce9d5
commit
f165ef89c0
@ -767,6 +767,9 @@ output_refs (lto_symtab_encoder_t encoder)
|
||||
for (int i = 0; node->iterate_reference (i, ref); i++)
|
||||
lto_output_ref (ob, ref, encoder);
|
||||
}
|
||||
if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
|
||||
if (cnode->declare_variant_alt)
|
||||
omp_lto_output_declare_variant_alt (ob, cnode, encoder);
|
||||
}
|
||||
|
||||
streamer_write_uhwi_stream (ob->main_stream, 0);
|
||||
@ -1608,6 +1611,9 @@ input_refs (class lto_input_block *ib,
|
||||
input_ref (ib, node, nodes);
|
||||
count--;
|
||||
}
|
||||
if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
|
||||
if (cnode->declare_variant_alt)
|
||||
omp_lto_input_declare_variant_alt (ib, cnode, nodes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2424,7 +2424,7 @@ output_function (struct cgraph_node *node)
|
||||
/* As we do not recurse into BLOCK_SUBBLOCKS but only BLOCK_SUPERCONTEXT
|
||||
collect block tree leafs and stream those. */
|
||||
auto_vec<tree> block_tree_leafs;
|
||||
if (DECL_INITIAL (function))
|
||||
if (DECL_INITIAL (function) && DECL_INITIAL (function) != error_mark_node)
|
||||
collect_block_tree_leafs (DECL_INITIAL (function), block_tree_leafs);
|
||||
streamer_write_uhwi (ob, block_tree_leafs.length ());
|
||||
for (unsigned i = 0; i < block_tree_leafs.length (); ++i)
|
||||
@ -2788,7 +2788,8 @@ lto_output (void)
|
||||
&& flag_incremental_link != INCREMENTAL_LINK_LTO)
|
||||
/* Thunks have no body but they may be synthetized
|
||||
at WPA time. */
|
||||
|| DECL_ARGUMENTS (cnode->decl)))
|
||||
|| DECL_ARGUMENTS (cnode->decl)
|
||||
|| cnode->declare_variant_alt))
|
||||
output_function (cnode);
|
||||
else if ((vnode = dyn_cast <varpool_node *> (snode))
|
||||
&& (DECL_INITIAL (vnode->decl) != error_mark_node
|
||||
|
@ -927,6 +927,12 @@ bool reachable_from_this_partition_p (struct cgraph_node *,
|
||||
lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder);
|
||||
void select_what_to_stream (void);
|
||||
|
||||
/* In omp-general.c. */
|
||||
void omp_lto_output_declare_variant_alt (lto_simple_output_block *,
|
||||
cgraph_node *, lto_symtab_encoder_t);
|
||||
void omp_lto_input_declare_variant_alt (lto_input_block *, cgraph_node *,
|
||||
vec<symtab_node *>);
|
||||
|
||||
/* In options-save.c. */
|
||||
void cl_target_option_stream_out (struct output_block *, struct bitpack_d *,
|
||||
struct cl_target_option *);
|
||||
|
@ -2592,7 +2592,6 @@ lto_fixup_prevailing_decls (tree t)
|
||||
case TREE_LIST:
|
||||
LTO_SET_PREVAIL (TREE_VALUE (t));
|
||||
LTO_SET_PREVAIL (TREE_PURPOSE (t));
|
||||
LTO_NO_PREVAIL (TREE_PURPOSE (t));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
@ -593,7 +593,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||
|
||||
last_visited_node++;
|
||||
|
||||
gcc_assert (node->definition || node->weakref);
|
||||
gcc_assert (node->definition || node->weakref
|
||||
|| node->declare_variant_alt);
|
||||
|
||||
/* Compute boundary cost of callgraph edges. */
|
||||
for (edge = node->callees; edge; edge = edge->next_callee)
|
||||
@ -704,7 +705,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||
int index;
|
||||
|
||||
node = dyn_cast <cgraph_node *> (ref->referring);
|
||||
gcc_assert (node->definition);
|
||||
gcc_assert (node->definition || node->declare_variant_alt);
|
||||
index = lto_symtab_encoder_lookup (partition->encoder,
|
||||
node);
|
||||
if (index != LCC_NOT_FOUND
|
||||
|
@ -42,6 +42,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-pass.h"
|
||||
#include "omp-device-properties.h"
|
||||
#include "tree-iterator.h"
|
||||
#include "data-streamer.h"
|
||||
#include "streamer-hooks.h"
|
||||
|
||||
enum omp_requires omp_requires_mask;
|
||||
|
||||
@ -2337,6 +2339,125 @@ omp_resolve_declare_variant (tree base)
|
||||
? TREE_PURPOSE (TREE_VALUE (variant1)) : base);
|
||||
}
|
||||
|
||||
void
|
||||
omp_lto_output_declare_variant_alt (lto_simple_output_block *ob,
|
||||
cgraph_node *node,
|
||||
lto_symtab_encoder_t encoder)
|
||||
{
|
||||
gcc_assert (node->declare_variant_alt);
|
||||
|
||||
omp_declare_variant_base_entry entry;
|
||||
entry.base = NULL;
|
||||
entry.node = node;
|
||||
entry.variants = NULL;
|
||||
omp_declare_variant_base_entry *entryp
|
||||
= omp_declare_variant_alt->find_with_hash (&entry, DECL_UID (node->decl));
|
||||
gcc_assert (entryp);
|
||||
|
||||
int nbase = lto_symtab_encoder_lookup (encoder, entryp->base);
|
||||
gcc_assert (nbase != LCC_NOT_FOUND);
|
||||
streamer_write_hwi_stream (ob->main_stream, nbase);
|
||||
|
||||
streamer_write_hwi_stream (ob->main_stream, entryp->variants->length ());
|
||||
|
||||
unsigned int i;
|
||||
omp_declare_variant_entry *varentry;
|
||||
FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry)
|
||||
{
|
||||
int nvar = lto_symtab_encoder_lookup (encoder, varentry->variant);
|
||||
gcc_assert (nvar != LCC_NOT_FOUND);
|
||||
streamer_write_hwi_stream (ob->main_stream, nvar);
|
||||
|
||||
for (widest_int *w = &varentry->score; ;
|
||||
w = &varentry->score_in_declare_simd_clone)
|
||||
{
|
||||
unsigned len = w->get_len ();
|
||||
streamer_write_hwi_stream (ob->main_stream, len);
|
||||
const HOST_WIDE_INT *val = w->get_val ();
|
||||
for (unsigned j = 0; j < len; j++)
|
||||
streamer_write_hwi_stream (ob->main_stream, val[j]);
|
||||
if (w == &varentry->score_in_declare_simd_clone)
|
||||
break;
|
||||
}
|
||||
|
||||
HOST_WIDE_INT cnt = -1;
|
||||
HOST_WIDE_INT i = varentry->matches ? 1 : 0;
|
||||
for (tree attr = DECL_ATTRIBUTES (entryp->base->decl);
|
||||
attr; attr = TREE_CHAIN (attr), i += 2)
|
||||
{
|
||||
attr = lookup_attribute ("omp declare variant base", attr);
|
||||
if (attr == NULL_TREE)
|
||||
break;
|
||||
|
||||
if (varentry->ctx == TREE_VALUE (TREE_VALUE (attr)))
|
||||
{
|
||||
cnt = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gcc_assert (cnt != -1);
|
||||
streamer_write_hwi_stream (ob->main_stream, cnt);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
omp_lto_input_declare_variant_alt (lto_input_block *ib, cgraph_node *node,
|
||||
vec<symtab_node *> nodes)
|
||||
{
|
||||
gcc_assert (node->declare_variant_alt);
|
||||
omp_declare_variant_base_entry *entryp
|
||||
= ggc_cleared_alloc<omp_declare_variant_base_entry> ();
|
||||
entryp->base = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]);
|
||||
entryp->node = node;
|
||||
unsigned int len = streamer_read_hwi (ib);
|
||||
vec_alloc (entryp->variants, len);
|
||||
|
||||
for (unsigned int i = 0; i < len; i++)
|
||||
{
|
||||
omp_declare_variant_entry varentry;
|
||||
varentry.variant
|
||||
= dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]);
|
||||
for (widest_int *w = &varentry.score; ;
|
||||
w = &varentry.score_in_declare_simd_clone)
|
||||
{
|
||||
unsigned len2 = streamer_read_hwi (ib);
|
||||
HOST_WIDE_INT arr[WIDE_INT_MAX_ELTS];
|
||||
gcc_assert (len2 <= WIDE_INT_MAX_ELTS);
|
||||
for (unsigned int j = 0; j < len2; j++)
|
||||
arr[j] = streamer_read_hwi (ib);
|
||||
*w = widest_int::from_array (arr, len2, true);
|
||||
if (w == &varentry.score_in_declare_simd_clone)
|
||||
break;
|
||||
}
|
||||
|
||||
HOST_WIDE_INT cnt = streamer_read_hwi (ib);
|
||||
HOST_WIDE_INT j = 0;
|
||||
varentry.ctx = NULL_TREE;
|
||||
varentry.matches = (cnt & 1) ? true : false;
|
||||
cnt &= ~HOST_WIDE_INT_1;
|
||||
for (tree attr = DECL_ATTRIBUTES (entryp->base->decl);
|
||||
attr; attr = TREE_CHAIN (attr), j += 2)
|
||||
{
|
||||
attr = lookup_attribute ("omp declare variant base", attr);
|
||||
if (attr == NULL_TREE)
|
||||
break;
|
||||
|
||||
if (cnt == j)
|
||||
{
|
||||
varentry.ctx = TREE_VALUE (TREE_VALUE (attr));
|
||||
break;
|
||||
}
|
||||
}
|
||||
gcc_assert (varentry.ctx != NULL_TREE);
|
||||
entryp->variants->quick_push (varentry);
|
||||
}
|
||||
if (omp_declare_variant_alt == NULL)
|
||||
omp_declare_variant_alt
|
||||
= hash_table<omp_declare_variant_alt_hasher>::create_ggc (64);
|
||||
*omp_declare_variant_alt->find_slot_with_hash (entryp, DECL_UID (node->decl),
|
||||
INSERT) = entryp;
|
||||
}
|
||||
|
||||
/* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
|
||||
macro on gomp-constants.h. We do not check for overflow. */
|
||||
|
@ -2731,7 +2731,8 @@ ipa_write_summaries (void)
|
||||
{
|
||||
struct cgraph_node *node = order[i];
|
||||
|
||||
if (node->definition && node->need_lto_streaming)
|
||||
if ((node->definition || node->declare_variant_alt)
|
||||
&& node->need_lto_streaming)
|
||||
{
|
||||
if (gimple_has_body_p (node->decl))
|
||||
lto_prepare_function_for_streaming (node);
|
||||
|
@ -2006,7 +2006,7 @@ symtab_node::get_partitioning_class (void)
|
||||
if (DECL_ABSTRACT_P (decl))
|
||||
return SYMBOL_EXTERNAL;
|
||||
|
||||
if (cnode && cnode->inlined_to)
|
||||
if (cnode && (cnode->inlined_to || cnode->declare_variant_alt))
|
||||
return SYMBOL_DUPLICATE;
|
||||
|
||||
/* Transparent aliases are always duplicated. */
|
||||
|
54
libgomp/testsuite/libgomp.c/declare-variant-1.c
Normal file
54
libgomp/testsuite/libgomp.c/declare-variant-1.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* { dg-do link { target vect_simd_clones } } */
|
||||
/* { dg-require-effective-target lto } */
|
||||
/* { dg-require-effective-target fpic } */
|
||||
/* { dg-require-effective-target shared } */
|
||||
/* { dg-additional-options "-fdump-tree-gimple -fdump-tree-optimized -O2 -fPIC -shared -flto -flto-partition=one" } */
|
||||
/* { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
int
|
||||
f01 (int a)
|
||||
{
|
||||
asm volatile ("" : "+g" (a) : "g" (1) : "memory");
|
||||
return a;
|
||||
}
|
||||
|
||||
int
|
||||
f02 (int a)
|
||||
{
|
||||
asm volatile ("" : "+g" (a) : "g" (2) : "memory");
|
||||
return a;
|
||||
}
|
||||
|
||||
int
|
||||
f03 (int a)
|
||||
{
|
||||
asm volatile ("" : "+g" (a) : "g" (3) : "memory");
|
||||
return a;
|
||||
}
|
||||
|
||||
#pragma omp declare variant (f01) match (device={isa("avx512f")}) /* 4 or 8 */
|
||||
#pragma omp declare variant (f02) match (implementation={vendor(score(3):gnu)},device={kind(cpu)}) /* (1 or 2) + 3 */
|
||||
#pragma omp declare variant (f03) match (implementation={vendor(score(5):gnu)},device={kind(host)}) /* (1 or 2) + 5 */
|
||||
int
|
||||
f04 (int a)
|
||||
{
|
||||
asm volatile ("" : "+g" (a) : "g" (4) : "memory");
|
||||
return a;
|
||||
}
|
||||
|
||||
#pragma omp declare simd
|
||||
int
|
||||
test1 (int x)
|
||||
{
|
||||
/* At gimplification time, we can't decide yet which function to call. */
|
||||
/* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" } } */
|
||||
/* After simd clones are created, the original non-clone test1 shall
|
||||
call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones
|
||||
shall call f01 with score 8. */
|
||||
/* { dg-final { scan-ltrans-tree-dump-not "f04 \\\(x" "optimized" } } */
|
||||
/* { dg-final { scan-ltrans-tree-dump-times "f03 \\\(x" 14 "optimized" } } */
|
||||
/* { dg-final { scan-ltrans-tree-dump-times "f01 \\\(x" 4 "optimized" } } */
|
||||
int a = f04 (x);
|
||||
int b = f04 (x);
|
||||
return a + b;
|
||||
}
|
Loading…
Reference in New Issue
Block a user