mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-12 01:34:58 +08:00
f165ef89c0
> I've tried to add the saving/restoring next to ipa refs saving/restoring, as > the declare variant alt stuff is kind of extension of those, unfortunately > following doesn't compile, because I need to also write or read a tree there > (ctx is a portion of DECL_ATTRIBUTES of the base function), but the ipa refs > write/read back functions don't have arguments that can be used for that. This patch adds the streaming out and in of those omp_declare_variant_alt hash table on the side data for the declare_variant_alt cgraph_nodes and treats for LTO purposes the declare_variant_alt nodes (which have no body) as if they contained a body that calls all the possible variants. After IPA all the calls to these magic declare_variant_alt calls are replaced with call to one of the variant depending on which one has the highest score in the context. 2020-10-28 Jakub Jelinek <jakub@redhat.com> PR lto/96680 gcc/ * lto-streamer.h (omp_lto_output_declare_variant_alt, omp_lto_input_declare_variant_alt): Declare variant. * symtab.c (symtab_node::get_partitioning_class): Return SYMBOL_DUPLICATE for declare_variant_alt nodes. * passes.c (ipa_write_summaries): Add declare_variant_alt to partition. * lto-cgraph.c (output_refs): Call omp_lto_output_declare_variant_alt on declare_variant_alt nodes. (input_refs): Call omp_lto_input_declare_variant_alt on declare_variant_alt nodes. * lto-streamer-out.c (output_function): Don't call collect_block_tree_leafs if DECL_INITIAL is error_mark_node. (lto_output): Call output_function even for declare_variant_alt nodes. * omp-general.c (omp_lto_output_declare_variant_alt, omp_lto_input_declare_variant_alt): New functions. gcc/lto/ * lto-common.c (lto_fixup_prevailing_decls): Don't use LTO_NO_PREVAIL on TREE_LIST's TREE_PURPOSE. * lto-partition.c (lto_balanced_map): Treat declare_variant_alt nodes like definitions. libgomp/ * testsuite/libgomp.c/declare-variant-1.c: New test.
55 lines
1.7 KiB
C
55 lines
1.7 KiB
C
/* { dg-do link { target vect_simd_clones } } */
|
|
/* { dg-require-effective-target lto } */
|
|
/* { dg-require-effective-target fpic } */
|
|
/* { dg-require-effective-target shared } */
|
|
/* { dg-additional-options "-fdump-tree-gimple -fdump-tree-optimized -O2 -fPIC -shared -flto -flto-partition=one" } */
|
|
/* { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } } */
|
|
|
|
int
|
|
f01 (int a)
|
|
{
|
|
asm volatile ("" : "+g" (a) : "g" (1) : "memory");
|
|
return a;
|
|
}
|
|
|
|
int
|
|
f02 (int a)
|
|
{
|
|
asm volatile ("" : "+g" (a) : "g" (2) : "memory");
|
|
return a;
|
|
}
|
|
|
|
int
|
|
f03 (int a)
|
|
{
|
|
asm volatile ("" : "+g" (a) : "g" (3) : "memory");
|
|
return a;
|
|
}
|
|
|
|
#pragma omp declare variant (f01) match (device={isa("avx512f")}) /* 4 or 8 */
|
|
#pragma omp declare variant (f02) match (implementation={vendor(score(3):gnu)},device={kind(cpu)}) /* (1 or 2) + 3 */
|
|
#pragma omp declare variant (f03) match (implementation={vendor(score(5):gnu)},device={kind(host)}) /* (1 or 2) + 5 */
|
|
int
|
|
f04 (int a)
|
|
{
|
|
asm volatile ("" : "+g" (a) : "g" (4) : "memory");
|
|
return a;
|
|
}
|
|
|
|
#pragma omp declare simd
|
|
int
|
|
test1 (int x)
|
|
{
|
|
/* At gimplification time, we can't decide yet which function to call. */
|
|
/* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" } } */
|
|
/* After simd clones are created, the original non-clone test1 shall
|
|
call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones
|
|
shall call f01 with score 8. */
|
|
/* { dg-final { scan-ltrans-tree-dump-not "f04 \\\(x" "optimized" } } */
|
|
/* { dg-final { scan-ltrans-tree-dump-times "f03 \\\(x" 14 "optimized" } } */
|
|
/* { dg-final { scan-ltrans-tree-dump-times "f01 \\\(x" 4 "optimized" } } */
|
|
int a = f04 (x);
|
|
int b = f04 (x);
|
|
return a + b;
|
|
}
|