Use plain -fopenacc to enable OpenACC kernels processing

gcc/
	* tree-parloops.c (create_parallel_loop, gen_parallel_loop)
	(parallelize_loops): In OpenACC kernels mode, set n_threads to
	zero.
	(pass_parallelize_loops::gate): In OpenACC kernels mode, gate on
	flag_openacc.
	* tree-ssa-loop.c (gate_oacc_kernels): Likewise.
	gcc/testsuite/
	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: Adjust
	to -ftree-parallelize-loops/-fopenacc changes.
	* c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
	* c-c++-common/goacc/kernels-double-reduction.c: Likewise.
	* c-c++-common/goacc/kernels-loop-2.c: Likewise.
	* c-c++-common/goacc/kernels-loop-3.c: Likewise.
	* c-c++-common/goacc/kernels-loop-g.c: Likewise.
	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
	* c-c++-common/goacc/kernels-loop-n.c: Likewise.
	* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
	* c-c++-common/goacc/kernels-loop.c: Likewise.
	* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
	* c-c++-common/goacc/kernels-reduction.c: Likewise.
	* gfortran.dg/goacc/kernels-loop-inner.f95: Likewise.
	* gfortran.dg/goacc/kernels-loops-adjacent.f95: Likewise.
	libgomp/
	* oacc-parallel.c (GOACC_parallel_keyed): Initialize dims.
	* plugin/plugin-nvptx.c (nvptx_exec): Provide default values for
	dims.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Adjust to
	-ftree-parallelize-loops/-fopenacc changes.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c:
	Likewise.

From-SVN: r233634
This commit is contained in:
Thomas Schwinge 2016-02-23 16:07:54 +01:00 committed by Thomas Schwinge
parent 43331dfbb8
commit f99c355797
36 changed files with 114 additions and 87 deletions

View File

@ -1,3 +1,12 @@
2016-02-23 Thomas Schwinge <thomas@codesourcery.com>
* tree-parloops.c (create_parallel_loop, gen_parallel_loop)
(parallelize_loops): In OpenACC kernels mode, set n_threads to
zero.
(pass_parallelize_loops::gate): In OpenACC kernels mode, gate on
flag_openacc.
* tree-ssa-loop.c (gate_oacc_kernels): Likewise.
2016-02-23 Richard Biener <rguenther@suse.de>
* mem-stats.h (struct mem_usage): Use PRIu64 for printing size_t.

View File

@ -1,3 +1,21 @@
2016-02-23 Thomas Schwinge <thomas@codesourcery.com>
* c-c++-common/goacc/kernels-counter-vars-function-scope.c: Adjust
to -ftree-parallelize-loops/-fopenacc changes.
* c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
* c-c++-common/goacc/kernels-double-reduction.c: Likewise.
* c-c++-common/goacc/kernels-loop-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-3.c: Likewise.
* c-c++-common/goacc/kernels-loop-g.c: Likewise.
* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
* c-c++-common/goacc/kernels-loop-n.c: Likewise.
* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
* c-c++-common/goacc/kernels-loop.c: Likewise.
* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
* c-c++-common/goacc/kernels-reduction.c: Likewise.
* gfortran.dg/goacc/kernels-loop-inner.f95: Likewise.
* gfortran.dg/goacc/kernels-loops-adjacent.f95: Likewise.
2016-02-23 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* gcc.target/i386/chkp-hidden-def.c: Require alias support.

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -51,4 +50,4 @@ main (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -34,4 +33,4 @@ foo (unsigned int n)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -34,4 +33,4 @@ foo (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -67,4 +66,4 @@ main (void)
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -45,5 +44,4 @@ main (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,6 +1,5 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-g" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -13,5 +12,4 @@
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -49,4 +48,4 @@ main (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -52,5 +51,4 @@ foo (COUNTERTYPE n)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -36,4 +35,4 @@ main (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -52,5 +51,4 @@ main (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -50,5 +49,4 @@ main (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
@ -32,5 +31,4 @@ foo (void)
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */

View File

@ -1,5 +1,4 @@
! { dg-additional-options "-O2" }
! { dg-additional-options "-ftree-parallelize-loops=32" }
program main
implicit none

View File

@ -1,5 +1,4 @@
! { dg-additional-options "-O2" }
! { dg-additional-options "-ftree-parallelize-loops=10" }
program main
implicit none

View File

@ -2016,7 +2016,8 @@ transform_to_exit_first_loop (struct loop *loop,
/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
NEW_DATA is the variable that should be initialized from the argument
of LOOP_FN. N_THREADS is the requested number of threads. */
of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
that number is to be determined later. */
static void
create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
@ -2049,6 +2050,7 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
basic_block paral_bb = single_pred (bb);
gsi = gsi_last_bb (paral_bb);
gcc_checking_assert (n_threads != 0);
t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
OMP_CLAUSE_NUM_THREADS_EXPR (t)
= build_int_cst (integer_type_node, n_threads);
@ -2221,7 +2223,8 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
}
/* Generates code to execute the iterations of LOOP in N_THREADS
threads in parallel.
threads in parallel, which can be 0 if that number is to be determined
later.
NITER describes number of iterations of LOOP.
REDUCTION_LIST describes the reductions existent in the LOOP. */
@ -2318,6 +2321,7 @@ gen_parallel_loop (struct loop *loop,
else
m_p_thread=MIN_PER_THREAD;
gcc_checking_assert (n_threads != 0);
many_iterations_cond =
fold_build2 (GE_EXPR, boolean_type_node,
nit, build_int_cst (type, m_p_thread * n_threads));
@ -3177,7 +3181,7 @@ oacc_entry_exit_ok (struct loop *loop,
static bool
parallelize_loops (bool oacc_kernels_p)
{
unsigned n_threads = flag_tree_parallelize_loops;
unsigned n_threads;
bool changed = false;
struct loop *loop;
struct loop *skip_loop = NULL;
@ -3199,6 +3203,13 @@ parallelize_loops (bool oacc_kernels_p)
if (cfun->has_nonlocal_label)
return false;
/* For OpenACC kernels, n_threads will be determined later; otherwise, it's
the argument to -ftree-parallelize-loops. */
if (oacc_kernels_p)
n_threads = 0;
else
n_threads = flag_tree_parallelize_loops;
gcc_obstack_init (&parloop_obstack);
reduction_info_table_type reduction_list (10);
@ -3361,7 +3372,13 @@ public:
{}
/* opt_pass methods: */
virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
virtual bool gate (function *)
{
if (oacc_kernels_p)
return flag_openacc;
else
return flag_tree_parallelize_loops > 1;
}
virtual unsigned int execute (function *);
opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
void set_pass_param (unsigned int n, bool param)

View File

@ -148,7 +148,7 @@ make_pass_tree_loop (gcc::context *ctxt)
static bool
gate_oacc_kernels (function *fn)
{
if (flag_tree_parallelize_loops <= 1)
if (!flag_openacc)
return false;
tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
@ -230,10 +230,9 @@ public:
virtual bool gate (function *)
{
return (optimize
/* Don't bother doing anything if the program has errors. */
&& !seen_error ()
&& flag_openacc
&& flag_tree_parallelize_loops > 1);
/* Don't bother doing anything if the program has errors. */
&& !seen_error ());
}
}; // class pass_ipa_oacc

View File

@ -1,3 +1,35 @@
2016-02-23 Thomas Schwinge <thomas@codesourcery.com>
* oacc-parallel.c (GOACC_parallel_keyed): Initialize dims.
* plugin/plugin-nvptx.c (nvptx_exec): Provide default values for
dims.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Adjust to
-ftree-parallelize-loops/-fopenacc changes.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c:
Likewise.
2016-02-22 Cesar Philippidis <cesar@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/vprop.c: New test.

View File

@ -103,6 +103,10 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
return;
}
/* Default: let the runtime choose. */
for (i = 0; i != GOMP_DIM_MAX; i++)
dims[i] = 0;
va_start (ap, kinds);
/* TODO: This will need amending when device_type is implemented. */
while ((tag = va_arg (ap, unsigned)) != 0)

View File

@ -894,9 +894,21 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
/* Initialize the launch dimensions. Typically this is constant,
provided by the device compiler, but we must permit runtime
values. */
for (i = 0; i != 3; i++)
if (targ_fn->launch->dim[i])
dims[i] = targ_fn->launch->dim[i];
int seen_zero = 0;
for (i = 0; i != GOMP_DIM_MAX; i++)
{
if (targ_fn->launch->dim[i])
dims[i] = targ_fn->launch->dim[i];
if (!dims[i])
seen_zero = 1;
}
if (seen_zero)
{
for (i = 0; i != GOMP_DIM_MAX; i++)
if (!dims[i])
dims[i] = /* TODO */ 32;
}
/* This reserves a chunk of a pre-allocated page of memory mapped on both
the host and the device. HP is a host pointer to the new chunk, and DP is

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N (1024 * 512)

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N (1024 * 512)

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 32

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 32

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 32

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 32

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 32

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 32

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 100

View File

@ -1,5 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-g" } */
#include "kernels-loop.c"

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N ((1024 * 512) + 1)

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N ((1024 * 512) + 1)

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N 1000

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define N (1024 * 512)

View File

@ -1,6 +1,3 @@
/* { dg-do run } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
#include <stdlib.h>
#define n 10000