Update OpenACC test cases

gcc/testsuite/
	* c-c++-common/goacc/combined-directives.c: Clean up dg-*
	directives.
	* c-c++-common/goacc/loop-clauses.c: Likewise.
	* g++.dg/goacc/template.C: Likewise.
	* gfortran.dg/goacc/combined-directives.f90: Likewise.
	* gfortran.dg/goacc/loop-1.f95: Likewise.
	* gfortran.dg/goacc/loop-5.f95: Likewise.
	* gfortran.dg/goacc/loop-6.f95: Likewise.
	* gfortran.dg/goacc/loop-tree-1.f90: Likewise.
	* c-c++-common/goacc-gomp/nesting-1.c: Update.
	* c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise.
	* c-c++-common/goacc/clauses-fail.c: Likewise.
	* c-c++-common/goacc/parallel-1.c: Likewise.
	* c-c++-common/goacc/reduction-1.c: Likewise.
	* c-c++-common/goacc/reduction-2.c: Likewise.
	* c-c++-common/goacc/reduction-3.c: Likewise.
	* c-c++-common/goacc/reduction-4.c: Likewise.
	* c-c++-common/goacc/routine-3.c: Likewise.
	* c-c++-common/goacc/routine-4.c: Likewise.
	* c-c++-common/goacc/routine-5.c: Likewise.
	* c-c++-common/goacc/tile.c: Likewise.
	* g++.dg/goacc/template.C: Likewise.
	* gfortran.dg/goacc/combined-directives.f90: Likewise.
	* c-c++-common/goacc/nesting-1.c: Move dg-error test cases into...
	* c-c++-common/goacc/nesting-fail-1.c: ... this file.  Update.
	* c-c++-common/goacc/kernels-1.c: Update.  Incorporate...
	* c-c++-common/goacc/kernels-empty.c: ... this file, and...
	* c-c++-common/goacc/kernels-eternal.c: ... this file, and...
	* c-c++-common/goacc/kernels-noreturn.c: ... this file.
	* c-c++-common/goacc/host_data-1.c: New file.  Incorporate...
	* c-c++-common/goacc/use_device-1.c: ... this file.
	* c-c++-common/goacc/host_data-2.c: New file.  Incorporate...
	* c-c++-common/goacc/host_data-5.c: ... this file, and...
	* c-c++-common/goacc/host_data-6.c: ... this file.
	* c-c++-common/goacc/loop-2-kernels.c: New file.
	* c-c++-common/goacc/loop-2-parallel.c: Likewise.
	* c-c++-common/goacc/loop-3.c: Likewise.
	* g++.dg/goacc/reference.C: Likewise.
	* g++.dg/goacc/routine-1.C: Likewise.
	* g++.dg/goacc/routine-2.C: Likewise.
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Update.
	* testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/vector-loop.c: Likewise.
	* testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/declare-1.f90: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise.
	XFAIL.
	* testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Update.
	Incorporate...
	* testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c: ... this
	file.
	* testsuite/libgomp.oacc-c++/template-reduction.C: New file.
	* testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/private-variables.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-4.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise.
	* testsuite/libgomp.oacc-fortran/clauses-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/default-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/firstprivate-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/gang-static-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/if-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90:
	Likewise.
	* testsuite/libgomp.oacc-fortran/pr68813.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Merge this
	file...
	* testsuite/libgomp.oacc-c-c++-common/parallel-1.c: ..., and this
	file into...
	* testsuite/libgomp.oacc-c-c++-common/data-clauses.h: ... this new
	file.  Update.
	* testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c: New
	file.
	* testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/kernels-2.c: Rename to...
	* testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c:
	... this new file.  Update.
	* testsuite/libgomp.oacc-c-c++-common/parallel-2.c: Rename to...
	* testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c:
	... this new file.  Update.
	* testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: New
	file.  Incorporate...
	* testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c: ... this
	file, and...
	* testsuite/libgomp.oacc-c-c++-common/worker-single-4.c: ... this
	file, and...
	* testsuite/libgomp.oacc-c-c++-common/worker-single-6.c: ... this
	file.
	* testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Remove file.

Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
Co-Authored-By: Chung-Lin Tang <cltang@codesourcery.com>
Co-Authored-By: James Norris <jnorris@codesourcery.com>
Co-Authored-By: Julian Brown <julian@codesourcery.com>
Co-Authored-By: Nathan Sidwell <nathan@codesourcery.com>
Co-Authored-By: Tom de Vries <tom@codesourcery.com>

From-SVN: r234575
This commit is contained in:
Thomas Schwinge 2016-03-30 17:08:47 +02:00 committed by Thomas Schwinge
parent ba9c755f25
commit 2620c80db0
77 changed files with 7131 additions and 1109 deletions

View File

@ -1,3 +1,52 @@
2016-03-30 Thomas Schwinge <thomas@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Chung-Lin Tang <cltang@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
James Norris <jnorris@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
Nathan Sidwell <nathan@codesourcery.com>
* c-c++-common/goacc/combined-directives.c: Clean up dg-*
directives.
* c-c++-common/goacc/loop-clauses.c: Likewise.
* g++.dg/goacc/template.C: Likewise.
* gfortran.dg/goacc/combined-directives.f90: Likewise.
* gfortran.dg/goacc/loop-1.f95: Likewise.
* gfortran.dg/goacc/loop-5.f95: Likewise.
* gfortran.dg/goacc/loop-6.f95: Likewise.
* gfortran.dg/goacc/loop-tree-1.f90: Likewise.
* c-c++-common/goacc-gomp/nesting-1.c: Update.
* c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise.
* c-c++-common/goacc/clauses-fail.c: Likewise.
* c-c++-common/goacc/parallel-1.c: Likewise.
* c-c++-common/goacc/reduction-1.c: Likewise.
* c-c++-common/goacc/reduction-2.c: Likewise.
* c-c++-common/goacc/reduction-3.c: Likewise.
* c-c++-common/goacc/reduction-4.c: Likewise.
* c-c++-common/goacc/routine-3.c: Likewise.
* c-c++-common/goacc/routine-4.c: Likewise.
* c-c++-common/goacc/routine-5.c: Likewise.
* c-c++-common/goacc/tile.c: Likewise.
* g++.dg/goacc/template.C: Likewise.
* gfortran.dg/goacc/combined-directives.f90: Likewise.
* c-c++-common/goacc/nesting-1.c: Move dg-error test cases into...
* c-c++-common/goacc/nesting-fail-1.c: ... this file. Update.
* c-c++-common/goacc/kernels-1.c: Update. Incorporate...
* c-c++-common/goacc/kernels-empty.c: ... this file, and...
* c-c++-common/goacc/kernels-eternal.c: ... this file, and...
* c-c++-common/goacc/kernels-noreturn.c: ... this file.
* c-c++-common/goacc/host_data-1.c: New file. Incorporate...
* c-c++-common/goacc/use_device-1.c: ... this file.
* c-c++-common/goacc/host_data-2.c: New file. Incorporate...
* c-c++-common/goacc/host_data-5.c: ... this file, and...
* c-c++-common/goacc/host_data-6.c: ... this file.
* c-c++-common/goacc/loop-2-kernels.c: New file.
* c-c++-common/goacc/loop-2-parallel.c: Likewise.
* c-c++-common/goacc/loop-3.c: Likewise.
* g++.dg/goacc/reference.C: Likewise.
* g++.dg/goacc/routine-1.C: Likewise.
* g++.dg/goacc/routine-2.C: Likewise.
2016-03-30 Richard Biener <rguenther@suse.de>
PR middle-end/70450

View File

@ -20,12 +20,12 @@ f_acc_kernels (void)
}
}
#pragma acc routine vector
void
f_acc_loop (void)
{
int i;
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{

View File

@ -1,4 +1,5 @@
extern int i;
#pragma acc declare create(i)
void
f_omp (void)
@ -14,6 +15,9 @@ f_omp (void)
#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */
#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */
#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
for (i = 0; i < 2; ++i)
;
}
#pragma omp for
@ -358,85 +362,77 @@ f_acc_data (void)
}
}
#pragma acc routine
void
f_acc_loop (void)
{
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
for (i = 0; i < 3; i++)
;
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
{
;
}
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
}
#pragma acc parallel
#pragma acc loop
for (i = 0; i < 2; ++i)
{
#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
;
#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
}
}

View File

@ -1,3 +1,5 @@
/* Miscellaneous tests where clause parsing is expected to fail. */
void
f (void)
{
@ -17,3 +19,13 @@ f (void)
for (i = 0; i < 2; ++i)
;
}
void
f2 (void)
{
int a, b[100];
#pragma acc parallel firstprivate (b[10:20]) /* { dg-error "expected ... before ... token" } */
;
}

View File

@ -1,10 +1,7 @@
// { dg-do compile }
// { dg-options "-fopenacc -fdump-tree-gimple" }
// { dg-additional-options "-fdump-tree-gimple" }
// This error is temporary. Remove when support is added for these clauses
// in the middle end. Also remove the comments from the reduction test
// Remove the comments from the reduction test
// after the FE learns that reduction variables may appear in data clauses too.
// { dg-prune-output "sorry, unimplemented" }
void
test ()

View File

@ -1,4 +1,14 @@
/* { dg-do compile } */
/* Test valid use of host_data directive. */
int v1[3][3];
void
f (void)
{
#pragma acc host_data use_device(v1)
;
}
void bar (float *, float *);

View File

@ -0,0 +1,78 @@
/* Test invalid use of host_data directive. */
int v0;
#pragma acc host_data use_device(v0) /* { dg-error "expected declaration specifiers before" } */
void
f (void)
{
int v2 = 3;
#pragma acc host_data copy(v2) /* { dg-error ".copy. is not valid for ..pragma acc host_data." } */
;
#pragma acc host_data use_device(v2)
;
/* { dg-error ".use_device_ptr. variable is neither a pointer nor an array" "" { target c } 14 } */
/* { dg-error ".use_device_ptr. variable is neither a pointer, nor an arraynor reference to pointer or array" "" { target c++ } 14 } */
#pragma acc host_data use_device(v0)
;
/* { dg-error ".use_device_ptr. variable is neither a pointer nor an array" "" { target c } 19 } */
/* { dg-error ".use_device_ptr. variable is neither a pointer, nor an arraynor reference to pointer or array" "" { target c++ } 19 } */
}
void
f2 (void)
{
int x[100];
#pragma acc enter data copyin (x)
/* Specifying an array index is not valid for host_data/use_device. */
#pragma acc host_data use_device (x[4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */
;
#pragma acc exit data delete (x)
}
void
f3 (void)
{
int x[100];
#pragma acc data copyin (x[25:50])
{
int *xp;
#pragma acc host_data use_device (x)
{
/* This use of the present clause is undefined behavior for OpenACC. */
#pragma acc parallel present (x) copyout (xp) /* { dg-error "variable .x. declared in enclosing .host_data. region" } */
{
xp = x;
}
}
}
}
void
f4 (void)
{
int x[50];
#pragma acc data copyin (x[10:30])
{
int *xp;
#pragma acc host_data use_device (x)
{
/* Here 'x' being implicitly firstprivate for the parallel region
conflicts with it being declared as use_device in the enclosing
host_data region. */
#pragma acc parallel copyout (xp)
{
xp = x; /* { dg-error "variable .x. declared in enclosing .host_data. region" } */
}
}
}
}

View File

@ -1,23 +0,0 @@
/* { dg-do compile } */
#define N 1024
int main (int argc, char* argv[])
{
int x[N];
#pragma acc data copyin (x[0:N])
{
int *xp;
#pragma acc host_data use_device (x)
{
/* This use of the present clause is undefined behavior for OpenACC. */
#pragma acc parallel present (x) copyout (xp) /* { dg-error "variable 'x' declared in enclosing 'host_data' region" } */
{
xp = x;
}
}
}
return 0;
}

View File

@ -1,25 +0,0 @@
/* { dg-do compile } */
#define N 1024
int main (int argc, char* argv[])
{
int x[N];
#pragma acc data copyin (x[0:N])
{
int *xp;
#pragma acc host_data use_device (x)
{
/* Here 'x' being implicitly firstprivate for the parallel region
conflicts with it being declared as use_device in the enclosing
host_data region. */
#pragma acc parallel copyout (xp)
{
xp = x; /* { dg-error "variable 'x' declared in enclosing 'host_data' region" } */
}
}
}
return 0;
}

View File

@ -1,6 +1,45 @@
void
foo (void)
int
kernels_empty (void)
{
#pragma acc kernels
;
return 0;
}
int
kernels_eternal (void)
{
#pragma acc kernels
{
while (1)
;
}
return 0;
}
int
kernels_noreturn (void)
{
#pragma acc kernels
__builtin_abort ();
return 0;
}
float b[10][15][10];
void
kernels_loop_ptr_it (void)
{
float *i;
#pragma acc kernels
{
#pragma acc loop
for (i = &b[0][0][0]; i < &b[0][0][10]; i++)
;
}
}

View File

@ -1,6 +0,0 @@
void
foo (void)
{
#pragma acc kernels
;
}

View File

@ -1,11 +0,0 @@
int
main (void)
{
#pragma acc kernels
{
while (1)
;
}
return 0;
}

View File

@ -1,12 +0,0 @@
int
main (void)
{
#pragma acc kernels
{
__builtin_abort ();
}
return 0;
}

View File

@ -0,0 +1,189 @@
void K(void)
{
int i, j;
#pragma acc kernels
{
#pragma acc loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang(5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang(num:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang(static:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang(static:*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang
for (i = 0; i < 10; i++)
{
#pragma acc loop vector
for (j = 0; j < 10; j++)
{ }
#pragma acc loop worker
for (j = 0; j < 10; j++)
{ }
#pragma acc loop gang // { dg-error "inner loop uses same" }
for (j = 0; j < 10; j++)
{ }
}
#pragma acc loop seq gang // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker(5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker(num:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker
for (i = 0; i < 10; i++)
{
#pragma acc loop vector
for (j = 0; j < 10; j++)
{ }
#pragma acc loop worker // { dg-error "inner loop uses same" }
for (j = 0; j < 10; j++)
{ }
#pragma acc loop gang
for (j = 0; j < 10; j++)
{ }
}
#pragma acc loop seq worker // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang worker
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector(5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector(length:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector
for (i = 0; i < 10; i++)
{
#pragma acc loop vector // { dg-error "inner loop uses same" }
for (j = 1; j < 10; j++)
{ }
#pragma acc loop worker
for (j = 1; j < 10; j++)
{ }
#pragma acc loop gang
for (j = 1; j < 10; j++)
{ }
}
#pragma acc loop seq vector // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang vector
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker vector
for (i = 0; i < 10; i++)
{ }
#pragma acc loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc loop seq auto // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang auto // { dg-error "'auto' conflicts" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker auto // { dg-error "'auto' conflicts" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector auto // { dg-error "'auto' conflicts" }
for (i = 0; i < 10; i++)
{ }
}
#pragma acc kernels loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop gang
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop gang(5)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop gang(num:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop gang(static:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop gang(static:*)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop worker
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop worker(5)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop worker(num:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop seq worker // { dg-error "'seq' overrides" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
{ }
#pragma acc kernels loop gang worker
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop vector
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop vector(5)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop vector(length:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop seq vector // { dg-error "'seq' overrides" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
{ }
#pragma acc kernels loop gang vector
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop worker vector
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop seq auto // { dg-error "'seq' overrides" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
{ }
#pragma acc kernels loop gang auto // { dg-error "'auto' conflicts" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
{ }
#pragma acc kernels loop worker auto // { dg-error "'auto' conflicts" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
{ }
#pragma acc kernels loop vector auto // { dg-error "'auto' conflicts" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
{ }
}

View File

@ -0,0 +1,162 @@
void P(void)
{
int i, j;
#pragma acc parallel
{
#pragma acc loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang(static:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang(static:*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang // { dg-message "containing loop" }
for (i = 0; i < 10; i++)
{
#pragma acc loop vector
for (j = 1; j < 10; j++)
{ }
#pragma acc loop worker
for (j = 1; j < 10; j++)
{ }
#pragma acc loop gang // { dg-error "inner loop uses same" }
for (j = 1; j < 10; j++)
{ }
}
#pragma acc loop seq gang // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker // { dg-message "containing loop" 2 }
for (i = 0; i < 10; i++)
{
#pragma acc loop vector
for (j = 1; j < 10; j++)
{ }
#pragma acc loop worker // { dg-error "inner loop uses same" }
for (j = 1; j < 10; j++)
{ }
#pragma acc loop gang // { dg-error "incorrectly nested" }
for (j = 1; j < 10; j++)
{ }
}
#pragma acc loop seq worker // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang worker
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector
for (i = 0; i < 10; i++)
{ }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector // { dg-message "containing loop" 3 }
for (i = 0; i < 10; i++)
{
#pragma acc loop vector // { dg-error "inner loop uses same" }
for (j = 1; j < 10; j++)
{ }
#pragma acc loop worker // { dg-error "incorrectly nested" }
for (j = 1; j < 10; j++)
{ }
#pragma acc loop gang // { dg-error "incorrectly nested" }
for (j = 1; j < 10; j++)
{ }
}
#pragma acc loop seq vector // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang vector
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker vector
for (i = 0; i < 10; i++)
{ }
#pragma acc loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc loop seq auto // { dg-error "'seq' overrides" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang auto // { dg-error "'auto' conflicts" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker auto // { dg-error "'auto' conflicts" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector auto // { dg-error "'auto' conflicts" }
for (i = 0; i < 10; i++)
{ }
}
#pragma acc parallel loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop gang
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop gang(static:5)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop gang(static:*)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop seq gang // { dg-error "'seq' overrides" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
{ }
#pragma acc parallel loop worker
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop seq worker // { dg-error "'seq' overrides" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
{ }
#pragma acc parallel loop gang worker
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop vector
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop seq vector // { dg-error "'seq' overrides" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
{ }
#pragma acc parallel loop gang vector
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop worker vector
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop auto
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop seq auto // { dg-error "'seq' overrides" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
{ }
#pragma acc parallel loop gang auto // { dg-error "'auto' conflicts" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
{ }
#pragma acc parallel loop worker auto // { dg-error "'auto' conflicts" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
{ }
#pragma acc parallel loop vector auto // { dg-error "'auto' conflicts" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
{ }
}

View File

@ -0,0 +1,58 @@
void par1 (void)
{
int i, j;
#pragma acc parallel
{
#pragma acc loop gang(5) // { dg-error "argument not permitted" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang(num:5) // { dg-error "argument not permitted" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker(5) // { dg-error "argument not permitted" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker(num:5) // { dg-error "argument not permitted" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector(5) // { dg-error "argument not permitted" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector(length:5) // { dg-error "argument not permitted" }
for (i = 0; i < 10; i++)
{ }
}
}
void p2 (void)
{
int i, j;
#pragma acc parallel loop gang(5) // { dg-error "argument not permitted" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
{ }
#pragma acc parallel loop gang(num:5) // { dg-error "argument not permitted" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
{ }
#pragma acc parallel loop worker(5) // { dg-error "argument not permitted" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
{ }
#pragma acc parallel loop worker(num:5) // { dg-error "argument not permitted" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
{ }
#pragma acc parallel loop vector(5) // { dg-error "argument not permitted" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
{ }
#pragma acc parallel loop vector(length:5) // { dg-error "argument not permitted" "" { target c } }
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
{ }
}

View File

@ -1,7 +1,3 @@
/* { dg-do compile } */
/* { dg-prune-output "sorry, unimplemented" } */
int
main ()
{

View File

@ -58,10 +58,6 @@ f_acc_data (void)
#pragma acc exit data delete(i)
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
for (i = 0; i < 2; ++i)
;
#pragma acc data
{
#pragma acc parallel
@ -92,10 +88,6 @@ f_acc_data (void)
#pragma acc enter data copyin(i)
#pragma acc exit data delete(i)
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
for (i = 0; i < 2; ++i)
;
}
}
}

View File

@ -38,6 +38,25 @@ f_acc_kernels (void)
}
}
void
f_acc_data (void)
{
unsigned int i;
#pragma acc data
{
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
for (i = 0; i < 2; ++i)
;
#pragma acc data
{
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
for (i = 0; i < 2; ++i)
;
}
}
}
#pragma acc routine
void
f_acc_routine (void)
@ -45,3 +64,13 @@ f_acc_routine (void)
#pragma acc parallel /* { dg-error "OpenACC region inside of OpenACC routine, nested parallelism not supported yet" } */
;
}
void
f (void)
{
int i, v = 0;
#pragma acc loop gang reduction (+:v) /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
for (i = 0; i < 10; i++)
v++;
}

View File

@ -1,6 +1,38 @@
void
foo (void)
int
parallel_empty (void)
{
#pragma acc parallel
;
return 0;
}
int
parallel_eternal (void)
{
#pragma acc parallel
{
while (1)
;
}
return 0;
}
int
parallel_noreturn (void)
{
#pragma acc parallel
__builtin_abort ();
return 0;
}
int
parallel_clauses (void)
{
int a, b[100];
#pragma acc parallel firstprivate (a, b)
;
}

View File

@ -1,70 +1,65 @@
/* { dg-require-effective-target alloca } */
/* Integer reductions. */
#define vl 32
#define n 1000
int
main(void)
{
const int n = 1000;
int i;
int result, array[n];
int lresult;
/* '+' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (+:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (+:result)
for (i = 0; i < n; i++)
result += array[i];
/* '*' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (*:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (*:result)
for (i = 0; i < n; i++)
result *= array[i];
// result = 0;
// vresult = 0;
//
// /* 'max' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result > array[i] ? result : array[i];
//
// /* 'min' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result < array[i] ? result : array[i];
/* 'max' reductions. */
#pragma acc parallel
#pragma acc loop gang worker vector reduction (max:result)
for (i = 0; i < n; i++)
result = result > array[i] ? result : array[i];
/* 'min' reductions. */
#pragma acc parallel
#pragma acc loop gang worker vector reduction (min:result)
for (i = 0; i < n; i++)
result = result < array[i] ? result : array[i];
/* '&' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (&:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (&:result)
for (i = 0; i < n; i++)
result &= array[i];
/* '|' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (|:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (|:result)
for (i = 0; i < n; i++)
result |= array[i];
/* '^' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (^:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (^:result)
for (i = 0; i < n; i++)
result ^= array[i];
/* '&&' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (&&:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (&&:lresult)
for (i = 0; i < n; i++)
lresult = lresult && (result > array[i]);
/* '||' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (||:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (||:lresult)
for (i = 0; i < n; i++)
lresult = lresult || (result > array[i]);

View File

@ -1,49 +1,47 @@
/* { dg-require-effective-target alloca } */
/* float reductions. */
#define vl 32
#define n 1000
int
main(void)
{
const int n = 1000;
int i;
float result, array[n];
int lresult;
/* '+' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (+:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (+:result)
for (i = 0; i < n; i++)
result += array[i];
/* '*' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (*:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (*:result)
for (i = 0; i < n; i++)
result *= array[i];
// /* 'max' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result > array[i] ? result : array[i];
//
// /* 'min' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result < array[i] ? result : array[i];
/* 'max' reductions. */
#pragma acc parallel
#pragma acc loop gang worker vector reduction (max:result)
for (i = 0; i < n; i++)
result = result > array[i] ? result : array[i];
/* 'min' reductions. */
#pragma acc parallel
#pragma acc loop gang worker vector reduction (min:result)
for (i = 0; i < n; i++)
result = result < array[i] ? result : array[i];
/* '&&' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (&&:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (&&:lresult)
for (i = 0; i < n; i++)
lresult = lresult && (result > array[i]);
/* '||' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (||:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (||:lresult)
for (i = 0; i < n; i++)
lresult = lresult || (result > array[i]);

View File

@ -1,49 +1,47 @@
/* { dg-require-effective-target alloca } */
/* double reductions. */
#define vl 32
#define n 1000
int
main(void)
{
const int n = 1000;
int i;
double result, array[n];
int lresult;
/* '+' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (+:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (+:result)
for (i = 0; i < n; i++)
result += array[i];
/* '*' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (*:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (*:result)
for (i = 0; i < n; i++)
result *= array[i];
// /* 'max' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result > array[i] ? result : array[i];
//
// /* 'min' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result < array[i] ? result : array[i];
/* 'max' reductions. */
#pragma acc parallel
#pragma acc loop gang worker vector reduction (max:result)
for (i = 0; i < n; i++)
result = result > array[i] ? result : array[i];
/* 'min' reductions. */
#pragma acc parallel
#pragma acc loop gang worker vector reduction (min:result)
for (i = 0; i < n; i++)
result = result < array[i] ? result : array[i];
/* '&&' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (&&:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (&&:lresult)
for (i = 0; i < n; i++)
lresult = lresult && (result > array[i]);
/* '||' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (||:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (||:lresult)
for (i = 0; i < n; i++)
lresult = lresult || (result > array[i]);

View File

@ -1,51 +1,35 @@
/* { dg-require-effective-target alloca } */
/* complex reductions. */
#define vl 32
#define n 1000
int
main(void)
{
const int n = 1000;
int i;
__complex__ double result, array[n];
int lresult;
/* '+' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (+:result)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (+:result)
for (i = 0; i < n; i++)
result += array[i];
/* Needs support for complex multiplication. */
// /* '*' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (*:result)
// for (i = 0; i < n; i++)
// result *= array[i];
//
// /* 'max' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result > array[i] ? result : array[i];
//
// /* 'min' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result < array[i] ? result : array[i];
/* '*' reductions. */
#pragma acc parallel
#pragma acc loop gang worker vector reduction (*:result)
for (i = 0; i < n; i++)
result *= array[i];
/* '&&' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (&&:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (&&:lresult)
for (i = 0; i < n; i++)
lresult = lresult && (__real__(result) > __real__(array[i]));
/* '||' reductions. */
#pragma acc parallel vector_length (vl)
#pragma acc loop reduction (||:lresult)
#pragma acc parallel
#pragma acc loop gang worker vector reduction (||:lresult)
for (i = 0; i < n; i++)
lresult = lresult || (__real__(result) > __real__(array[i]));

View File

@ -1,52 +1,118 @@
/* Test invalid calls to routines. */
#pragma acc routine gang
void gang (void) /* { dg-message "declared here" 3 } */
int
gang () /* { dg-message "declared here" 3 } */
{
#pragma acc loop gang worker vector
for (int i = 0; i < 10; i++)
{
}
return 1;
}
#pragma acc routine worker
void worker (void) /* { dg-message "declared here" 2 } */
int
worker () /* { dg-message "declared here" 2 } */
{
#pragma acc loop worker vector
for (int i = 0; i < 10; i++)
{
}
return 1;
}
#pragma acc routine vector
void vector (void) /* { dg-message "declared here" 1 } */
int
vector () /* { dg-message "declared here" } */
{
#pragma acc loop vector
for (int i = 0; i < 10; i++)
{
}
return 1;
}
#pragma acc routine seq
void seq (void)
int
seq ()
{
return 1;
}
int main ()
int
main ()
{
#pragma acc parallel num_gangs (32) num_workers (32) vector_length (32)
int red = 0;
#pragma acc parallel copy (red)
{
#pragma acc loop gang /* { dg-message "loop here" 1 } */
/* Independent/seq loop tests. */
#pragma acc loop reduction (+:red) // { dg-warning "insufficient partitioning" }
for (int i = 0; i < 10; i++)
{
gang (); /* { dg-error "routine call uses same" } */
worker ();
vector ();
seq ();
}
#pragma acc loop worker /* { dg-message "loop here" 2 } */
red += gang ();
#pragma acc loop reduction (+:red)
for (int i = 0; i < 10; i++)
{
gang (); /* { dg-error "routine call uses same" } */
worker (); /* { dg-error "routine call uses same" } */
vector ();
seq ();
}
#pragma acc loop vector /* { dg-message "loop here" 3 } */
red += worker ();
#pragma acc loop reduction (+:red)
for (int i = 0; i < 10; i++)
{
gang (); /* { dg-error "routine call uses same" } */
worker (); /* { dg-error "routine call uses same" } */
vector (); /* { dg-error "routine call uses same" } */
seq ();
}
red += vector ();
/* Gang routine tests. */
#pragma acc loop gang reduction (+:red) /* { dg-message "containing loop" } */
for (int i = 0; i < 10; i++)
red += gang (); // { dg-error "routine call uses same" }
#pragma acc loop worker reduction (+:red) /* { dg-message "containing loop" } */
for (int i = 0; i < 10; i++)
red += gang (); // { dg-error "routine call uses same" }
#pragma acc loop vector reduction (+:red) /* { dg-message "containing loop" } */
for (int i = 0; i < 10; i++)
red += gang (); // { dg-error "routine call uses same" }
/* Worker routine tests. */
#pragma acc loop gang reduction (+:red)
for (int i = 0; i < 10; i++)
red += worker ();
#pragma acc loop worker reduction (+:red) /* { dg-message "containing loop" } */
for (int i = 0; i < 10; i++)
red += worker (); // { dg-error "routine call uses same" }
#pragma acc loop vector reduction (+:red) /* { dg-message "containing loop" } */
for (int i = 0; i < 10; i++)
red += worker (); // { dg-error "routine call uses same" }
/* Vector routine tests. */
#pragma acc loop gang reduction (+:red)
for (int i = 0; i < 10; i++)
red += vector ();
#pragma acc loop worker reduction (+:red)
for (int i = 0; i < 10; i++)
red += vector ();
#pragma acc loop vector reduction (+:red) /* { dg-message "containing loop" } */
for (int i = 0; i < 10; i++)
red += vector (); // { dg-error "routine call uses same" }
/* Seq routine tests. */
#pragma acc loop gang reduction (+:red)
for (int i = 0; i < 10; i++)
red += seq ();
#pragma acc loop worker reduction (+:red)
for (int i = 0; i < 10; i++)
red += seq ();
#pragma acc loop vector reduction (+:red)
for (int i = 0; i < 10; i++)
red += seq ();
}
return 0;

View File

@ -1,3 +1,4 @@
/* Test invalid intra-routine parallelism. */
void gang (void);
void worker (void);
@ -14,6 +15,24 @@ void seq (void)
worker (); /* { dg-error "routine call uses" } */
vector (); /* { dg-error "routine call uses" } */
seq ();
int red;
#pragma acc loop reduction (+:red) // { dg-warning "insufficient partitioning" }
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop gang reduction (+:red) // { dg-error "disallowed by containing routine" }
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop worker reduction (+:red) // { dg-error "disallowed by containing routine" }
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop vector reduction (+:red) // { dg-error "disallowed by containing routine" }
for (int i = 0; i < 10; i++)
red ++;
}
void vector (void) /* { dg-message "declared here" 1 } */
@ -22,6 +41,24 @@ void vector (void) /* { dg-message "declared here" 1 } */
worker (); /* { dg-error "routine call uses" } */
vector ();
seq ();
int red;
#pragma acc loop reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop gang reduction (+:red) // { dg-error "disallowed by containing routine" }
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop worker reduction (+:red) // { dg-error "disallowed by containing routine" }
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop vector reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
}
void worker (void) /* { dg-message "declared here" 2 } */
@ -30,6 +67,24 @@ void worker (void) /* { dg-message "declared here" 2 } */
worker ();
vector ();
seq ();
int red;
#pragma acc loop reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop gang reduction (+:red) // { dg-error "disallowed by containing routine" }
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop worker reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop vector reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
}
void gang (void) /* { dg-message "declared here" 3 } */
@ -38,4 +93,22 @@ void gang (void) /* { dg-message "declared here" 3 } */
worker ();
vector ();
seq ();
int red;
#pragma acc loop reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop gang reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop worker reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
#pragma acc loop vector reduction (+:red)
for (int i = 0; i < 10; i++)
red ++;
}

View File

@ -46,6 +46,21 @@ using namespace g;
#pragma acc routine (c) /* { dg-error "does not refer to" } */
void Bar ();
void Foo ()
{
Bar ();
}
#pragma acc routine (Bar) // { dg-error "must be applied before use" }
#pragma acc routine (Foo) gang // { dg-error "must be applied before definition" }
#pragma acc routine (Baz) // { dg-error "not been declared" }
int vb1; /* { dg-error "directive for use" } */
extern int vb2; /* { dg-error "directive for use" } */
static int vb3; /* { dg-error "directive for use" } */

View File

@ -1,5 +1,3 @@
/* { dg-do compile } */
int
main ()
{
@ -71,3 +69,259 @@ main ()
return 0;
}
void par (void)
{
int i, j;
#pragma acc parallel
{
#pragma acc loop tile // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile() // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(1)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(2)
for (i = 0; i < 10; i++)
{
for (j = 1; j < 10; j++)
{ }
}
#pragma acc loop tile(-2) // { dg-warning "'tile' value must be positive" }
for (i = 1; i < 10; i++)
{ }
#pragma acc loop tile(i)
for (i = 1; i < 10; i++)
{ }
#pragma acc loop tile(2, 2, 1)
for (i = 1; i < 3; i++)
{
for (j = 4; j < 6; j++)
{ }
}
#pragma acc loop tile(2, 2)
for (i = 1; i < 5; i+=2)
{
for (j = i + 1; j < 7; j+=i)
{ }
}
#pragma acc loop vector tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang worker tile(*)
for (i = 0; i < 10; i++)
{ }
}
}
void p3 (void)
{
int i, j;
#pragma acc parallel loop tile // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop tile() // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop tile(1)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop tile(*, 1)
for (i = 0; i < 10; i++)
{
for (j = 1; j < 10; j++)
{ }
}
#pragma acc parallel loop tile(-2) // { dg-warning "'tile' value must be positive" }
for (i = 1; i < 10; i++)
{ }
#pragma acc parallel loop tile(i)
for (i = 1; i < 10; i++)
{ }
#pragma acc parallel loop tile(2, 2, 1)
for (i = 1; i < 3; i++)
{
for (j = 4; j < 6; j++)
{ }
}
#pragma acc parallel loop tile(2, 2)
for (i = 1; i < 5; i+=2)
{
for (j = i + 1; j < 7; j++)
{ }
}
#pragma acc parallel loop vector tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop vector gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop vector worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc parallel loop gang worker tile(*)
for (i = 0; i < 10; i++)
{ }
}
void
kern (void)
{
int i, j;
#pragma acc kernels
{
#pragma acc loop tile // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile() // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(1)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(2)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(6-2)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(6+2)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(*, 1)
for (i = 0; i < 10; i++)
{
for (j = 0; j < 10; i++)
{ }
}
#pragma acc loop tile(-2) // { dg-warning "'tile' value must be positive" }
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(i)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop tile(2, 2, 1)
for (i = 2; i < 4; i++)
for (i = 4; i < 6; i++)
{ }
#pragma acc loop tile(2, 2)
for (i = 1; i < 5; i+=2)
for (j = i+1; j < 7; i++)
{ }
#pragma acc loop vector tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop vector worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc loop gang worker tile(*)
for (i = 0; i < 10; i++)
{ }
}
}
void k3 (void)
{
int i, j;
#pragma acc kernels loop tile // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop tile() // { dg-error "expected" }
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop tile(1)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop tile(*, 1)
for (i = 0; i < 10; i++)
{
for (j = 1; j < 10; j++)
{ }
}
#pragma acc kernels loop tile(-2) // { dg-warning "'tile' value must be positive" }
for (i = 1; i < 10; i++)
{ }
#pragma acc kernels loop tile(i)
for (i = 1; i < 10; i++)
{ }
#pragma acc kernels loop tile(2, 2, 1)
for (i = 1; i < 3; i++)
{
for (j = 4; j < 6; j++)
{ }
}
#pragma acc kernels loop tile(2, 2)
for (i = 1; i < 5; i++)
{
for (j = i + 1; j < 7; j += i)
{ }
}
#pragma acc kernels loop vector tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop vector gang tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop vector worker tile(*)
for (i = 0; i < 10; i++)
{ }
#pragma acc kernels loop gang worker tile(*)
for (i = 0; i < 10; i++)
{ }
}

View File

@ -0,0 +1,39 @@
int
test1 (int &ref)
{
#pragma acc kernels copy (ref)
{
ref = 10;
}
}
int
test2 (int &ref)
{
int b;
#pragma acc kernels copyout (b)
{
b = ref + 10;
}
#pragma acc parallel copyout (b)
{
b = ref + 10;
}
ref = b;
}
int
main()
{
int a = 0;
int &ref_a = a;
#pragma acc parallel copy (a, ref_a)
{
ref_a = 5;
}
return a;
}

View File

@ -0,0 +1,13 @@
/* Test valid use of the routine directive. */
namespace N
{
extern void foo1();
extern void foo2();
#pragma acc routine (foo1)
#pragma acc routine
void foo3()
{
}
}
#pragma acc routine (N::foo2)

View File

@ -0,0 +1,42 @@
/* Test invalid use of the routine directive. */
template <typename T>
extern T one_d();
#pragma acc routine (one_d) /* { dg-error "names a set of overloads" } */
template <typename T>
T
one()
{
return 1;
}
#pragma acc routine (one) /* { dg-error "names a set of overloads" } */
int incr (int);
float incr (float);
int inc;
#pragma acc routine (incr) /* { dg-error "names a set of overloads" } */
#pragma acc routine (increment) /* { dg-error "has not been declared" } */
#pragma acc routine (inc) /* { dg-error "does not refer to a function" } */
#pragma acc routine (+) /* { dg-error "expected unqualified-id before '.' token" } */
int sum (int, int);
namespace foo {
#pragma acc routine (sum)
int sub (int, int);
}
#pragma acc routine (foo::sub)
/* It's strange to apply a routine directive to subset of overloaded
functions, but that is permissible in OpenACC 2.x. */
int decr (int a);
#pragma acc routine
float decr (float a);

View File

@ -1,8 +1,3 @@
// This error is temporary. Remove when support is added for these clauses
// in the middle end. Also remove the comments from the reduction test
// after the FE learns that reduction variables may appear in data clauses too.
// { dg-prune-output "sorry, unimplemented" }
#pragma acc routine
template <typename T> T
accDouble(int val)
@ -20,55 +15,62 @@ oacc_parallel_copy (T a)
double z = 4;
#pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a)
{
#pragma acc loop gang worker vector
for (int i = 0; i < 1; i++)
b = a;
}
#pragma acc parallel num_gangs (a) copy (w, x, y, z)
{
w = accDouble<char>(w);
x = accDouble<int>(x);
y = accDouble<float>(y);
z = accDouble<double>(z);
}
#pragma acc loop
for (int i = 0; i < 1; i++)
{
w = accDouble<char>(w);
x = accDouble<int>(x);
y = accDouble<float>(y);
z = accDouble<double>(z);
}
#pragma acc parallel num_gangs (a) if (1)
{
#pragma acc loop independent collapse (2) gang
for (int i = 0; i < a; i++)
for (int j = 0; j < 5; j++)
b = a;
#pragma acc loop auto tile (a, 3)
for (int i = 0; i < a; i++)
for (int j = 0; j < 5; j++)
b = a;
for (int i = 0; i < a; i++)
for (int j = 0; j < 5; j++)
b = a;
#pragma acc loop seq
for (int i = 0; i < a; i++)
b = a;
for (int i = 0; i < a; i++)
b = a;
}
T c;
#pragma acc parallel num_workers (10)
{
#pragma acc loop worker
for (int i = 0; i < 1; i++)
{
#pragma acc atomic capture
c = b++;
c = b++;
#pragma atomic update
c++;
c++;
#pragma acc atomic read
b = a;
b = a;
#pragma acc atomic write
b = a;
}
b = a;
}
//#pragma acc parallel reduction (+:c)
// {
// c = 1;
// }
#pragma acc parallel reduction (+:c)
c = 1;
#pragma acc data if (1) copy (b)
{
#pragma acc parallel
#pragma acc parallel
{
b = a;
}
@ -76,9 +78,9 @@ oacc_parallel_copy (T a)
#pragma acc enter data copyin (b)
#pragma acc parallel present (b)
{
b = a;
}
{
b = a;
}
#pragma acc update host (b)
#pragma acc update self (b)
@ -109,11 +111,9 @@ oacc_kernels_copy (T a)
#pragma acc kernels copyout (b) copyin (a)
b = a;
//#pragma acc kernels loop reduction (+:c)
// for (int i = 0; i < 10; i++)
// {
// c = 1;
// }
#pragma acc kernels loop reduction (+:c)
for (int i = 0; i < 10; i++)
c = 1;
#pragma acc data if (1) copy (b)
{
@ -125,9 +125,10 @@ oacc_kernels_copy (T a)
#pragma acc enter data copyin (b)
#pragma acc kernels present (b)
{
b = a;
}
{
b = a;
}
return b;
}

View File

@ -1,17 +1,10 @@
! Exercise combined OpenACC directives.
! { dg-do compile }
! { dg-options "-fopenacc -fdump-tree-gimple" }
! This error is temporary. Remove when support is added for these clauses
! in the middle end.
! { dg-prune-output "sorry, unimplemented" }
! Update the reduction tests.
! { dg-additional-options "-fdump-tree-gimple" }
subroutine test
implicit none
integer a(100), i, j, z
integer a(100), i, j, y, z
! PARALLEL
@ -73,10 +66,10 @@ subroutine test
end do
!$acc end parallel loop
! !$acc parallel loop reduction (+:z) copy (z)
! do i = 1, 100
! end do
! !$acc end parallel loop
!$acc parallel loop reduction (+:y) copy (y)
do i = 1, 100
end do
!$acc end parallel loop
! KERNELS
@ -138,10 +131,10 @@ subroutine test
end do
!$acc end kernels loop
! !$acc kernels loop reduction (+:z) copy (z)
! do i = 1, 100
! end do
! !$acc end kernels loop
!$acc kernels loop reduction (+:y) copy (y)
do i = 1, 100
end do
!$acc end kernels loop
end subroutine test
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. collapse.2." 2 "gimple" } }
@ -153,3 +146,5 @@ end subroutine test
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. reduction..:y." 2 "gimple" } }

View File

@ -1,5 +1,3 @@
! { dg-do compile }
! { dg-additional-options "-fmax-errors=100" }
module test
implicit none
contains
@ -29,14 +27,18 @@ subroutine test1
i = i + 1
end do
!$acc loop
do 300 d = 1, 30, 6 ! { dg-error "integer" }
do 300 d = 1, 30, 6
i = d
300 a(i) = 1
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
!$acc loop
do d = 1, 30, 5 ! { dg-error "integer" }
do d = 1, 30, 5
i = d
a(i) = 2
end do
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
!$acc loop
do i = 1, 30
if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@ -144,8 +146,10 @@ subroutine test1
end do
!$acc parallel loop collapse(2)
do i = 1, 3
do r = 4, 6 ! { dg-error "integer" }
do r = 4, 6
end do
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
end do
! Both seq and independent are not allowed
@ -167,4 +171,3 @@ subroutine test1
end subroutine test1
end module test
! { dg-prune-output "Deleted" }

View File

@ -1,9 +1,3 @@
! { dg-do compile }
! { dg-additional-options "-fmax-errors=100" }
! { dg-prune-output "sorry, unimplemented" }
! { dg-prune-output "Error: work-sharing region" }
program test
implicit none
integer :: i, j

View File

@ -1,11 +1,3 @@
! { dg-do compile }
! { dg-additional-options "-fmax-errors=100" }
! This error is temporary. Remove when support is added for these clauses
! in the middle end.
! { dg-prune-output "sorry, unimplemented" }
! { dg-prune-output "Error: work-sharing region" }
program test
implicit none
integer :: i, j

View File

@ -1,13 +1,7 @@
! { dg-do compile }
! { dg-additional-options "-fdump-tree-original -std=f2008" }
! test for tree-dump-original and spaces-commas
! This error is temporary. Remove when support is added for these clauses
! in the middle end.
! { dg-prune-output "sorry, unimplemented" }
! { dg-prune-output "Error: work-sharing region" }
program test
implicit none
integer :: i, j, k, m, sum

View File

@ -1,3 +1,71 @@
2016-03-30 Thomas Schwinge <thomas@codesourcery.com>
James Norris <jnorris@codesourcery.com>
Nathan Sidwell <nathan@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
Chung-Lin Tang <cltang@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Update.
* testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/vector-loop.c: Likewise.
* testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise.
* testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise.
* testsuite/libgomp.oacc-fortran/declare-1.f90: Likewise.
* testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise.
XFAIL.
* testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Update.
Incorporate...
* testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c: ... this
file.
* testsuite/libgomp.oacc-c++/template-reduction.C: New file.
* testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/private-variables.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/routine-4.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise.
* testsuite/libgomp.oacc-fortran/clauses-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/default-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/firstprivate-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/gang-static-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/if-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/pr68813.f90: Likewise.
* testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Merge this
file...
* testsuite/libgomp.oacc-c-c++-common/parallel-1.c: ..., and this
file into...
* testsuite/libgomp.oacc-c-c++-common/data-clauses.h: ... this new
file. Update.
* testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c: New
file.
* testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-2.c: Rename to...
* testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c:
... this new file. Update.
* testsuite/libgomp.oacc-c-c++-common/parallel-2.c: Rename to...
* testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c:
... this new file. Update.
* testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: New
file. Incorporate...
* testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c: ... this
file, and...
* testsuite/libgomp.oacc-c-c++-common/worker-single-4.c: ... this
file, and...
* testsuite/libgomp.oacc-c-c++-common/worker-single-6.c: ... this
file.
* testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Remove file.
2016-03-29 Thomas Schwinge <thomas@codesourcery.com>
* testsuite/libgomp.oacc-c++/c++.exp [!lang_test_file_found]: Call

View File

@ -0,0 +1,98 @@
const int n = 100;
// Check explicit template copy map
template<typename T> T
sum (T array[])
{
T s = 0;
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
for (int i = 0; i < n; i++)
s += array[i];
return s;
}
// Check implicit template copy map
template<typename T> T
sum ()
{
T s = 0;
T array[n];
for (int i = 0; i < n; i++)
array[i] = i+1;
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
for (int i = 0; i < n; i++)
s += array[i];
return s;
}
// Check present and async
template<typename T> T
async_sum (T array[])
{
T s = 0;
#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n])
for (int i = 0; i < n; i++)
array[i] = i+1;
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
for (int i = 0; i < n; i++)
s += array[i];
#pragma acc wait
return s;
}
// Check present and async and an explicit firstprivate
template<typename T> T
async_sum (int c)
{
T s = 0;
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1)
for (int i = 0; i < n; i++)
s += i+c;
#pragma acc wait
return s;
}
int
main()
{
int a[n];
int result = 0;
for (int i = 0; i < n; i++)
{
a[i] = i+1;
result += i+1;
}
if (sum (a) != result)
__builtin_abort ();
if (sum<int> () != result)
__builtin_abort ();
#pragma acc enter data copyin (a)
if (async_sum (a) != result)
__builtin_abort ();
if (async_sum<int> (1) != result)
__builtin_abort ();
#pragma acc exit data delete (a)
return 0;
}

View File

@ -1,4 +1,6 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* <http://news.gmane.org/find-root.php?message_id=%3C87pp0aaksc.fsf%40kepler.schwinge.homeip.net%3E>.
{ dg-xfail-run-if "TODO" { *-*-* } } */
/* { dg-additional-options "-lcuda" } */
#include <openacc.h>
@ -444,6 +446,438 @@ main (int argc, char **argv)
#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
#pragma acc wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 5.0)
abort ();
if (b[i] != 25.0)
abort ();
if (c[i] != 4.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 3.0;
b[i] = 0.0;
}
#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
{
#pragma acc kernels async
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc wait
}
for (i = 0; i < N; i++)
{
if (a[i] != 3.0)
abort ();
if (b[i] != 3.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 2.0;
b[i] = 0.0;
}
#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 2.0)
abort ();
if (b[i] != 2.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 3.0;
b[i] = 0.0;
c[i] = 0.0;
d[i] = 0.0;
}
#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
}
#pragma acc wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 3.0)
abort ();
if (b[i] != 9.0)
abort ();
if (c[i] != 4.0)
abort ();
if (d[i] != 1.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 2.0;
b[i] = 0.0;
c[i] = 0.0;
d[i] = 0.0;
e[i] = 0.0;
}
#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
}
#pragma acc kernels wait (1) async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
}
#pragma acc wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 2.0)
abort ();
if (b[i] != 4.0)
abort ();
if (c[i] != 4.0)
abort ();
if (d[i] != 1.0)
abort ();
if (e[i] != 11.0)
abort ();
}
r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuStreamCreate failed: %d\n", r);
abort ();
}
acc_set_cuda_stream (1, stream1);
for (i = 0; i < N; i++)
{
a[i] = 5.0;
b[i] = 0.0;
}
#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 5.0)
abort ();
if (b[i] != 5.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 7.0;
b[i] = 0.0;
c[i] = 0.0;
d[i] = 0.0;
}
#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
}
#pragma acc wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 7.0)
abort ();
if (b[i] != 49.0)
abort ();
if (c[i] != 4.0)
abort ();
if (d[i] != 1.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 3.0;
b[i] = 0.0;
c[i] = 0.0;
d[i] = 0.0;
e[i] = 0.0;
}
#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
}
#pragma acc kernels wait (1) async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
}
#pragma acc wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 3.0)
abort ();
if (b[i] != 9.0)
abort ();
if (c[i] != 4.0)
abort ();
if (d[i] != 1.0)
abort ();
if (e[i] != 17.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 4.0;
b[i] = 0.0;
c[i] = 0.0;
d[i] = 0.0;
e[i] = 0.0;
}
#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
}
#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
}
for (i = 0; i < N; i++)
{
if (a[i] != 4.0)
abort ();
if (b[i] != 16.0)
abort ();
if (c[i] != 4.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 5.0;
b[i] = 0.0;
c[i] = 0.0;
d[i] = 0.0;
e[i] = 0.0;
}
#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
{
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
}
#pragma acc kernels async (1)
{
int ii;
for (ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
}
#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
#pragma acc wait (1)
}

View File

@ -578,6 +578,32 @@ main (int argc, char **argv)
abort ();
}
if (acc_is_present (&a[0], (N * sizeof (float))))
abort ();
if (acc_is_present (&b[0], (N * sizeof (float))))
abort ();
for (i = 0; i < N; i++)
{
a[i] = 6.0;
b[i] = 0.0;
}
#pragma acc parallel pcopy (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
for (i = 0; i < N; i++)
{
if (b[i] != 6.0)
abort ();
}
if (acc_is_present (&a[0], (N * sizeof (float))))
abort ();

View File

@ -1,4 +1,4 @@
/* { dg-do run { target lto } } */
/* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */
#include "parallel-1.c"
#include "data-clauses-kernels.c"

View File

@ -0,0 +1,2 @@
#define CONSTRUCT kernels
#include "data-clauses.h"

View File

@ -1,4 +1,4 @@
/* { dg-do run { target lto } } */
/* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */
#include "kernels-1.c"
#include "data-clauses-parallel.c"

View File

@ -0,0 +1,2 @@
#define CONSTRUCT parallel
#include "data-clauses.h"

View File

@ -1,7 +1,3 @@
/* { dg-do run } */
#include <stdlib.h>
int i;
int main(void)
@ -11,145 +7,145 @@ int main(void)
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copyin (i, j)
{
if (i != -1 || j != -2)
abort ();
__builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
abort ();
__builtin_abort ();
#endif
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copyout (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copy (i, j)
{
if (i != -1 || j != -2)
abort ();
__builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) create (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
abort ();
__builtin_abort ();
#endif
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
{
if (i != -1 || j != -2)
abort ();
__builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
if (v != 1)
abort ();
__builtin_abort ();
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
abort ();
__builtin_abort ();
#endif
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copy (i, j)
{
if (i != -1 || j != -2)
abort ();
__builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
i = -1;
j = -2;
v = 0;
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_create (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
if (v != 1)
abort ();
__builtin_abort ();
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
abort ();
__builtin_abort ();
#endif
i = -1;
@ -158,23 +154,23 @@ int main(void)
#pragma acc data copyin (i, j)
{
#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present (i, j)
{
if (i != -1 || j != -2)
abort ();
__builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
abort ();
__builtin_abort ();
#endif
i = -1;
@ -183,23 +179,23 @@ int main(void)
#pragma acc data copyin(i, j)
{
#pragma acc parallel /* copyout */ present_or_copyout (v)
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v)
{
if (i != -1 || j != -2)
abort ();
__builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
__builtin_abort ();
v = 1;
}
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
abort ();
__builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
abort ();
__builtin_abort ();
#endif
return 0;

View File

@ -1,5 +1,3 @@
/* { dg-do run } */
#include <stdlib.h>
int main (void)
@ -16,6 +14,27 @@ int main (void)
a_2 = &a;
}
if (a != A)
abort ();
if (a_1 != a)
abort ();
#if ACC_MEM_SHARED
if (a_2 != &a)
abort ();
#else
if (a_2 == &a)
abort ();
#endif
a_1 = a_2 = 0;
#pragma acc data deviceptr (a)
#pragma acc parallel copyout (a_1, a_2)
{
a_1 = a;
a_2 = &a;
}
if (a != A)
abort ();
if (a_1 != a)

View File

@ -1,8 +1,7 @@
/* { dg-do run } */
#include <openacc.h>
int main ()
void t1 ()
{
int ok = 1;
int val = 2;
@ -28,14 +27,115 @@ int main ()
if (ondev)
{
if (!ok)
return 1;
__builtin_abort ();
if (val != 2)
return 1;
__builtin_abort ();
for (int i = 0; i < 32; i++)
if (ary[i] != 2 + i)
return 1;
__builtin_abort ();
}
}
void t2 ()
{
int ok = 1;
int val = 2;
#pragma acc data copy(val)
{
#pragma acc parallel present (val)
{
val = 7;
}
#pragma acc parallel firstprivate (val) copy(ok)
{
ok = val == 7;
val = 9;
}
}
if (!ok)
__builtin_abort ();
if (val != 7)
__builtin_abort ();
}
#define N 100
void t3 ()
{
int a, b[N], c, d, i;
int n = acc_get_device_type () == acc_device_nvidia ? N : 1;
a = 5;
for (i = 0; i < n; i++)
b[i] = -1;
#pragma acc parallel num_gangs (n) firstprivate (a)
#pragma acc loop gang
for (i = 0; i < n; i++)
{
a = a + i;
b[i] = a;
}
for (i = 0; i < n; i++)
if (a + i != b[i])
__builtin_abort ();
#pragma acc data copy (a)
{
#pragma acc parallel firstprivate (a) copyout (c)
{
a = 10;
c = a;
}
/* This version of 'a' should still be 5. */
#pragma acc parallel copyout (d) present (a)
{
d = a;
}
}
if (c != 10)
__builtin_abort ();
if (d != 5)
__builtin_abort ();
}
#undef N
void t4 ()
{
int x = 5, i, arr[32];
for (i = 0; i < 32; i++)
arr[i] = 3;
#pragma acc parallel firstprivate(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang
for (i = 0; i < 32; i++)
arr[i] += x;
}
for (i = 0; i < 32; i++)
if (arr[i] != 8)
__builtin_abort ();
}
int
main()
{
t1 ();
t2 ();
t3 ();
t4 ();
return 0;
}

View File

@ -1,31 +0,0 @@
/* { dg-do run } */
#include <openacc.h>
int main ()
{
int ok = 1;
int val = 2;
#pragma acc data copy(val)
{
#pragma acc parallel present (val)
{
val = 7;
}
#pragma acc parallel firstprivate (val) copy(ok)
{
ok = val == 7;
val = 9;
}
}
if (!ok)
return 1;
if(val != 7)
return 1;
return 0;
}

View File

@ -0,0 +1,48 @@
#include <assert.h>
#define N 100
void
test (int *a, int *b, int sarg)
{
int i;
for (i = 0; i < N; i++)
assert (a[i] == b[i] + sarg);
}
int
main ()
{
int a[N], b[N];
int i;
for (i = 0; i < N; i++)
b[i] = i+1;
#pragma acc parallel loop gang (static:*) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = b[i] + 0;
test (a, b, 0);
#pragma acc parallel loop gang (static:1) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = b[i] + 1;
test (a, b, 1);
#pragma acc parallel loop gang (static:5) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = b[i] + 5;
test (a, b, 5);
#pragma acc parallel loop gang (static:20) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = b[i] + 20;
test (a, b, 20);
return 0;
}

View File

@ -0,0 +1,100 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* This code uses nvptx inline assembly guarded with acc_on_device, which is
not optimized away at -O0, and then confuses the target assembler.
{ dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
#include <assert.h>
#include <openacc.h>
#define N 100
#define GANG_ID(I) \
(acc_on_device (acc_device_nvidia) \
? ({unsigned __r; \
__asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \
__r; }) : (I))
int
test_static(int *a, int num_gangs, int sarg)
{
int i, j;
if (sarg == 0)
sarg = 1;
for (i = 0; i < N / sarg; i++)
for (j = 0; j < sarg; j++)
assert (a[i*sarg+j] == i % num_gangs);
}
int
test_nonstatic(int *a, int gangs)
{
int i, j;
for (i = 0; i < N; i+=gangs)
for (j = 0; j < gangs; j++)
assert (a[i+j] == i/gangs);
}
int
main ()
{
int a[N];
int i, x;
#pragma acc parallel loop gang (static:*) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_nonstatic (a, 10);
#pragma acc parallel loop gang (static:1) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_static (a, 10, 1);
#pragma acc parallel loop gang (static:2) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_static (a, 10, 2);
#pragma acc parallel loop gang (static:5) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_static (a, 10, 5);
#pragma acc parallel loop gang (static:20) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_static (a, 10, 20);
/* Non-static gang. */
#pragma acc parallel loop gang num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_nonstatic (a, 10);
/* Static arguments with a variable expression. */
x = 20;
#pragma acc parallel loop gang (static:0+x) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_static (a, 10, 20);
x = 20;
#pragma acc parallel loop gang (static:x) num_gangs (10)
for (i = 0; i < 100; i++)
a[i] = GANG_ID (i);
test_static (a, 10, 20);
return 0;
}

View File

@ -1,5 +1,3 @@
/* { dg-do run } */
#include <openacc.h>
#include <stdlib.h>
#include <stdbool.h>
@ -608,5 +606,357 @@ main(int argc, char **argv)
abort ();
#endif
for (i = 0; i < N; i++)
a[i] = 4.0;
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(1)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
#if ACC_MEM_SHARED
exp = 5.0;
#else
exp = 4.0;
#endif
for (i = 0; i < N; i++)
{
if (b[i] != exp)
abort();
}
for (i = 0; i < N; i++)
a[i] = 16.0;
#pragma acc kernels if(0)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
for (i = 0; i < N; i++)
{
if (b[i] != 17.0)
abort();
}
for (i = 0; i < N; i++)
a[i] = 8.0;
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
#if ACC_MEM_SHARED
exp = 9.0;
#else
exp = 8.0;
#endif
for (i = 0; i < N; i++)
{
if (b[i] != exp)
abort();
}
for (i = 0; i < N; i++)
a[i] = 22.0;
#pragma acc kernels if(zero)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
for (i = 0; i < N; i++)
{
if (b[i] != 23.0)
abort();
}
for (i = 0; i < N; i++)
a[i] = 16.0;
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(true)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
#if ACC_MEM_SHARED
exp = 17.0;
#else
exp = 16.0;
#endif
for (i = 0; i < N; i++)
{
if (b[i] != exp)
abort();
}
for (i = 0; i < N; i++)
a[i] = 76.0;
#pragma acc kernels if(false)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
for (i = 0; i < N; i++)
{
if (b[i] != 77.0)
abort();
}
for (i = 0; i < N; i++)
a[i] = 22.0;
n = 1;
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
#if ACC_MEM_SHARED
exp = 23.0;
#else
exp = 22.0;
#endif
for (i = 0; i < N; i++)
{
if (b[i] != exp)
abort();
}
for (i = 0; i < N; i++)
a[i] = 18.0;
n = 0;
#pragma acc kernels if(n)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
for (i = 0; i < N; i++)
{
if (b[i] != 19.0)
abort();
}
for (i = 0; i < N; i++)
a[i] = 49.0;
n = 1;
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n + n)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
#if ACC_MEM_SHARED
exp = 50.0;
#else
exp = 49.0;
#endif
for (i = 0; i < N; i++)
{
if (b[i] != exp)
abort();
}
for (i = 0; i < N; i++)
a[i] = 38.0;
n = 0;
#pragma acc kernels if(n + n)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
for (i = 0; i < N; i++)
{
if (b[i] != 39.0)
abort();
}
for (i = 0; i < N; i++)
a[i] = 91.0;
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(-2)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
#if ACC_MEM_SHARED
exp = 92.0;
#else
exp = 91.0;
#endif
for (i = 0; i < N; i++)
{
if (b[i] != exp)
abort();
}
for (i = 0; i < N; i++)
a[i] = 43.0;
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
#if ACC_MEM_SHARED
exp = 44.0;
#else
exp = 43.0;
#endif
for (i = 0; i < N; i++)
{
if (b[i] != exp)
abort();
}
for (i = 0; i < N; i++)
a[i] = 87.0;
#pragma acc kernels if(one == 0)
{
int ii;
for (ii = 0; ii < N; ii++)
{
if (acc_on_device (acc_device_host))
b[ii] = a[ii] + 1;
else
b[ii] = a[ii];
}
}
for (i = 0; i < N; i++)
{
if (b[i] != 88.0)
abort();
}
for (i = 0; i < N; i++)
{
a[i] = 3.0;
b[i] = 9.0;
}
#if ACC_MEM_SHARED
exp = 0.0;
exp2 = 0.0;
#else
acc_map_data (a, d_a, N * sizeof (float));
acc_map_data (b, d_b, N * sizeof (float));
exp = 3.0;
exp2 = 9.0;
#endif
return 0;
}

View File

@ -1,184 +0,0 @@
/* { dg-do run } */
#include <stdlib.h>
int i;
int main (void)
{
int j, v;
#if 0
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j)
{
if (i != -1 || j != -2)
abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != -1 || j != -2)
abort ();
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j)
{
if (i != -1 || j != -2)
abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != -1 || j != -2)
abort ();
#endif
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
{
if (i != -1 || j != -2)
abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1)
abort ();
#if ACC_MEM_SHARED
if (i != 2 || j != 1)
abort ();
#else
if (i != -1 || j != -2)
abort ();
#endif
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j)
{
if (i != -1 || j != -2)
abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1)
abort ();
#if ACC_MEM_SHARED
if (i != 2 || j != 1)
abort ();
#else
if (i != -1 || j != -2)
abort ();
#endif
#if 0
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j)
{
if (i != -1 || j != -2)
abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
#endif
#if 0
i = -1;
j = -2;
v = 0;
#pragma acc kernels /* copyout */ present_or_copyout (v)
{
if (i != -1 || j != -2)
abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
abort ();
#endif
return 0;
}

View File

@ -0,0 +1,62 @@
/* Exercise the auto, independent, seq and tile loop clauses inside
kernels regions. */
#include <assert.h>
#define N 100
void
check (int *a, int *b)
{
int i;
for (i = 0; i < N; i++)
assert (a[i] == b[i]);
}
int
main ()
{
int i, a[N], b[N];
#pragma acc kernels copy(a)
{
#pragma acc loop auto
for (i = 0; i < N; i++)
a[i] = i;
}
for (i = 0; i < N; i++)
b[i] = i;
check (a, b);
#pragma acc kernels copyout(a)
{
#pragma acc loop independent
for (i = 0; i < N; i++)
a[i] = i;
}
check (a, b);
#pragma acc kernels present_or_copy(a)
{
#pragma acc loop seq
for (i = 0; i < N; i++)
a[i] = i;
}
check (a, b);
#pragma acc kernels pcopyout(a) present_or_copyin(b)
{
#pragma acc loop seq
for (i = 0; i < N; i++)
a[i] = b[i];
}
check (a, b);
return 0;
}

View File

@ -0,0 +1,895 @@
/* Miscellaneous test cases for gang/worker/vector mode transitions. */
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <math.h>
#include <openacc.h>
/* Test basic vector-partitioned mode transitions. */
void t1()
{
int n = 0, arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = 0;
#pragma acc parallel copy(n, arr) \
num_gangs(1) num_workers(1) vector_length(32)
{
int j;
n++;
#pragma acc loop vector
for (j = 0; j < 32; j++)
arr[j]++;
n++;
}
assert (n == 2);
for (i = 0; i < 32; i++)
assert (arr[i] == 1);
}
/* Test vector-partitioned, gang-partitioned mode. */
void t2()
{
int n[32], arr[1024], i;
for (i = 0; i < 1024; i++)
arr[i] = 0;
for (i = 0; i < 32; i++)
n[i] = 0;
#pragma acc parallel copy(n, arr) \
num_gangs(32) num_workers(1) vector_length(32)
{
int j, k;
#pragma acc loop gang(static:*)
for (j = 0; j < 32; j++)
n[j]++;
#pragma acc loop gang
for (j = 0; j < 32; j++)
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[j * 32 + k]++;
#pragma acc loop gang(static:*)
for (j = 0; j < 32; j++)
n[j]++;
}
for (i = 0; i < 32; i++)
assert (n[i] == 2);
for (i = 0; i < 1024; i++)
assert (arr[i] == 1);
}
/* Test conditions inside vector-partitioned loops. */
void t4()
{
int n[32], arr[1024], i;
for (i = 0; i < 1024; i++)
arr[i] = i;
for (i = 0; i < 32; i++)
n[i] = 0;
#pragma acc parallel copy(n, arr) \
num_gangs(32) num_workers(1) vector_length(32)
{
int j, k;
#pragma acc loop gang(static:*)
for (j = 0; j < 32; j++)
n[j]++;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
#pragma acc loop vector
for (k = 0; k < 32; k++)
if ((arr[j * 32 + k] % 2) != 0)
arr[j * 32 + k] *= 2;
}
#pragma acc loop gang(static:*)
for (j = 0; j < 32; j++)
n[j]++;
}
for (i = 0; i < 32; i++)
assert (n[i] == 2);
for (i = 0; i < 1024; i++)
assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
}
/* Test conditions inside gang-partitioned/vector-partitioned loops. */
void t5()
{
int n[32], arr[1024], i;
for (i = 0; i < 1024; i++)
arr[i] = i;
for (i = 0; i < 32; i++)
n[i] = 0;
#pragma acc parallel copy(n, arr) \
num_gangs(32) num_workers(1) vector_length(32)
{
int j;
#pragma acc loop gang(static:*)
for (j = 0; j < 32; j++)
n[j]++;
#pragma acc loop gang vector
for (j = 0; j < 1024; j++)
if ((arr[j] % 2) != 0)
arr[j] *= 2;
#pragma acc loop gang(static:*)
for (j = 0; j < 32; j++)
n[j]++;
}
for (i = 0; i < 32; i++)
assert (n[i] == 2);
for (i = 0; i < 1024; i++)
assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
}
/* Test trivial operation of vector-single mode. */
void t7()
{
int n = 0;
#pragma acc parallel copy(n) \
num_gangs(1) num_workers(1) vector_length(32)
{
n++;
}
assert (n == 1);
}
/* Test vector-single, gang-partitioned mode. */
void t8()
{
int arr[1024];
int gangs;
for (gangs = 1; gangs <= 1024; gangs <<= 1)
{
int i;
for (i = 0; i < 1024; i++)
arr[i] = 0;
#pragma acc parallel copy(arr) \
num_gangs(gangs) num_workers(1) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 1024; j++)
arr[j]++;
}
for (i = 0; i < 1024; i++)
assert (arr[i] == 1);
}
}
/* Test conditions in vector-single mode. */
void t9()
{
int arr[1024];
int gangs;
for (gangs = 1; gangs <= 1024; gangs <<= 1)
{
int i;
for (i = 0; i < 1024; i++)
arr[i] = 0;
#pragma acc parallel copy(arr) \
num_gangs(gangs) num_workers(1) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 1024; j++)
if ((j % 3) == 0)
arr[j]++;
else
arr[j] += 2;
}
for (i = 0; i < 1024; i++)
assert (arr[i] == ((i % 3) == 0) ? 1 : 2);
}
}
/* Test switch in vector-single mode. */
void t10()
{
int arr[1024];
int gangs;
for (gangs = 1; gangs <= 1024; gangs <<= 1)
{
int i;
for (i = 0; i < 1024; i++)
arr[i] = 0;
#pragma acc parallel copy(arr) \
num_gangs(gangs) num_workers(1) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 1024; j++)
switch (j % 5)
{
case 0: arr[j] += 1; break;
case 1: arr[j] += 2; break;
case 2: arr[j] += 3; break;
case 3: arr[j] += 4; break;
case 4: arr[j] += 5; break;
default: arr[j] += 99;
}
}
for (i = 0; i < 1024; i++)
assert (arr[i] == (i % 5) + 1);
}
}
/* Test switch in vector-single mode, initialise array on device. */
void t11()
{
int arr[1024];
int i;
for (i = 0; i < 1024; i++)
arr[i] = 99;
#pragma acc parallel copy(arr) \
num_gangs(1024) num_workers(1) vector_length(32)
{
int j;
/* This loop and the one following must be distributed to available gangs
in the same way to ensure data dependencies are not violated (hence the
"static" clauses). */
#pragma acc loop gang(static:*)
for (j = 0; j < 1024; j++)
arr[j] = 0;
#pragma acc loop gang(static:*)
for (j = 0; j < 1024; j++)
switch (j % 5)
{
case 0: arr[j] += 1; break;
case 1: arr[j] += 2; break;
case 2: arr[j] += 3; break;
case 3: arr[j] += 4; break;
case 4: arr[j] += 5; break;
default: arr[j] += 99;
}
}
for (i = 0; i < 1024; i++)
assert (arr[i] == (i % 5) + 1);
}
/* Test multiple conditions in vector-single mode. */
#define NUM_GANGS 4096
void t12()
{
bool fizz[NUM_GANGS], buzz[NUM_GANGS], fizzbuzz[NUM_GANGS];
int i;
#pragma acc parallel copyout(fizz, buzz, fizzbuzz) \
num_gangs(NUM_GANGS) num_workers(1) vector_length(32)
{
int j;
/* This loop and the one following must be distributed to available gangs
in the same way to ensure data dependencies are not violated (hence the
"static" clauses). */
#pragma acc loop gang(static:*)
for (j = 0; j < NUM_GANGS; j++)
fizz[j] = buzz[j] = fizzbuzz[j] = 0;
#pragma acc loop gang(static:*)
for (j = 0; j < NUM_GANGS; j++)
{
if ((j % 3) == 0 && (j % 5) == 0)
fizzbuzz[j] = 1;
else
{
if ((j % 3) == 0)
fizz[j] = 1;
else if ((j % 5) == 0)
buzz[j] = 1;
}
}
}
for (i = 0; i < NUM_GANGS; i++)
{
assert (fizzbuzz[i] == ((i % 3) == 0 && (i % 5) == 0));
assert (fizz[i] == ((i % 3) == 0 && (i % 5) != 0));
assert (buzz[i] == ((i % 3) != 0 && (i % 5) == 0));
}
}
#undef NUM_GANGS
/* Test worker-partitioned/vector-single mode. */
void t13()
{
int arr[32 * 8], i;
for (i = 0; i < 32 * 8; i++)
arr[i] = 0;
#pragma acc parallel copy(arr) \
num_gangs(8) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
int k;
#pragma acc loop worker
for (k = 0; k < 8; k++)
arr[j * 8 + k] += j * 8 + k;
}
}
for (i = 0; i < 32 * 8; i++)
assert (arr[i] == i);
}
/* Test worker-single/worker-partitioned transitions. */
void t16()
{
int n[32], arr[32 * 32], i;
for (i = 0; i < 32 * 32; i++)
arr[i] = 0;
for (i = 0; i < 32; i++)
n[i] = 0;
#pragma acc parallel copy(n, arr) \
num_gangs(8) num_workers(16) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
int k;
n[j]++;
#pragma acc loop worker
for (k = 0; k < 32; k++)
arr[j * 32 + k]++;
n[j]++;
#pragma acc loop worker
for (k = 0; k < 32; k++)
arr[j * 32 + k]++;
n[j]++;
#pragma acc loop worker
for (k = 0; k < 32; k++)
arr[j * 32 + k]++;
n[j]++;
}
}
for (i = 0; i < 32; i++)
assert (n[i] == 4);
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == 3);
}
/* Test correct synchronisation between worker-partitioned loops. */
void t17()
{
int arr_a[32 * 32], arr_b[32 * 32], i;
int num_workers, num_gangs;
for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
{
for (i = 0; i < 32 * 32; i++)
arr_a[i] = i;
#pragma acc parallel copyin(arr_a) copyout(arr_b) \
num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
int k;
#pragma acc loop worker
for (k = 0; k < 32; k++)
arr_b[j * 32 + (31 - k)] = arr_a[j * 32 + k] * 2;
#pragma acc loop worker
for (k = 0; k < 32; k++)
arr_a[j * 32 + (31 - k)] = arr_b[j * 32 + k] * 2;
#pragma acc loop worker
for (k = 0; k < 32; k++)
arr_b[j * 32 + (31 - k)] = arr_a[j * 32 + k] * 2;
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr_b[i] == (i ^ 31) * 8);
}
}
/* Test correct synchronisation between worker+vector-partitioned loops. */
void t18()
{
int arr_a[32 * 32 * 32], arr_b[32 * 32 * 32], i;
int num_workers, num_gangs;
for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
{
for (i = 0; i < 32 * 32 * 32; i++)
arr_a[i] = i;
#pragma acc parallel copyin(arr_a) copyout(arr_b) \
num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
int k;
#pragma acc loop worker vector
for (k = 0; k < 32 * 32; k++)
arr_b[j * 32 * 32 + (1023 - k)] = arr_a[j * 32 * 32 + k] * 2;
#pragma acc loop worker vector
for (k = 0; k < 32 * 32; k++)
arr_a[j * 32 * 32 + (1023 - k)] = arr_b[j * 32 * 32 + k] * 2;
#pragma acc loop worker vector
for (k = 0; k < 32 * 32; k++)
arr_b[j * 32 * 32 + (1023 - k)] = arr_a[j * 32 * 32 + k] * 2;
}
}
for (i = 0; i < 32 * 32 * 32; i++)
assert (arr_b[i] == (i ^ 1023) * 8);
}
}
/* Test correct synchronisation between vector-partitioned loops in
worker-partitioned mode. */
void t19()
{
int n[32 * 32], arr_a[32 * 32 * 32], arr_b[32 * 32 * 32], i;
int num_workers, num_gangs;
for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
{
for (i = 0; i < 32 * 32 * 32; i++)
arr_a[i] = i;
for (i = 0; i < 32 * 32; i++)
n[i] = 0;
#pragma acc parallel copy (n) copyin(arr_a) copyout(arr_b) \
num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
int k;
#pragma acc loop worker
for (k = 0; k < 32; k++)
{
int m;
n[j * 32 + k]++;
#pragma acc loop vector
for (m = 0; m < 32; m++)
{
if (((j * 1024 + k * 32 + m) % 2) == 0)
arr_b[j * 1024 + k * 32 + (31 - m)]
= arr_a[j * 1024 + k * 32 + m] * 2;
else
arr_b[j * 1024 + k * 32 + (31 - m)]
= arr_a[j * 1024 + k * 32 + m] * 3;
}
/* Test returning to vector-single mode... */
n[j * 32 + k]++;
#pragma acc loop vector
for (m = 0; m < 32; m++)
{
if (((j * 1024 + k * 32 + m) % 3) == 0)
arr_a[j * 1024 + k * 32 + (31 - m)]
= arr_b[j * 1024 + k * 32 + m] * 5;
else
arr_a[j * 1024 + k * 32 + (31 - m)]
= arr_b[j * 1024 + k * 32 + m] * 7;
}
/* ...and back-to-back vector loops. */
#pragma acc loop vector
for (m = 0; m < 32; m++)
{
if (((j * 1024 + k * 32 + m) % 2) == 0)
arr_b[j * 1024 + k * 32 + (31 - m)]
= arr_a[j * 1024 + k * 32 + m] * 3;
else
arr_b[j * 1024 + k * 32 + (31 - m)]
= arr_a[j * 1024 + k * 32 + m] * 2;
}
}
}
}
for (i = 0; i < 32 * 32; i++)
assert (n[i] == 2);
for (i = 0; i < 32 * 32 * 32; i++)
{
int m = 6 * ((i % 3) == 0 ? 5 : 7);
assert (arr_b[i] == (i ^ 31) * m);
}
}
}
/* With -O0, variables are on the stack, not in registers. Check that worker
state propagation handles the stack frame. */
void t20()
{
int w0 = 0;
int w1 = 0;
int w2 = 0;
int w3 = 0;
int w4 = 0;
int w5 = 0;
int w6 = 0;
int w7 = 0;
int i;
#pragma acc parallel copy (w0, w1, w2, w3, w4, w5, w6, w7) \
num_gangs (1) num_workers (8)
{
int internal = 100;
#pragma acc loop worker
for (i = 0; i < 8; i++)
{
switch (i)
{
case 0: w0 = internal; break;
case 1: w1 = internal; break;
case 2: w2 = internal; break;
case 3: w3 = internal; break;
case 4: w4 = internal; break;
case 5: w5 = internal; break;
case 6: w6 = internal; break;
case 7: w7 = internal; break;
default: break;
}
}
}
if (w0 != 100
|| w1 != 100
|| w2 != 100
|| w3 != 100
|| w4 != 100
|| w5 != 100
|| w6 != 100
|| w7 != 100)
__builtin_abort ();
}
/* Test worker-single/vector-single mode. */
void t21()
{
int arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = 0;
#pragma acc parallel copy(arr) \
num_gangs(8) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
arr[j]++;
}
for (i = 0; i < 32; i++)
assert (arr[i] == 1);
}
/* Test worker-single/vector-single mode. */
void t22()
{
int arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = 0;
#pragma acc parallel copy(arr) \
num_gangs(8) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
#pragma acc atomic
arr[j]++;
}
}
for (i = 0; i < 32; i++)
assert (arr[i] == 1);
}
/* Test condition in worker-single/vector-single mode. */
void t23()
{
int arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) \
num_gangs(8) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
if ((arr[j] % 2) != 0)
arr[j]++;
else
arr[j] += 2;
}
for (i = 0; i < 32; i++)
assert (arr[i] == ((i % 2) != 0) ? i + 1 : i + 2);
}
/* Test switch in worker-single/vector-single mode. */
void t24()
{
int arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) \
num_gangs(8) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
switch (arr[j] % 5)
{
case 0: arr[j] += 1; break;
case 1: arr[j] += 2; break;
case 2: arr[j] += 3; break;
case 3: arr[j] += 4; break;
case 4: arr[j] += 5; break;
default: arr[j] += 99;
}
}
for (i = 0; i < 32; i++)
assert (arr[i] == i + (i % 5) + 1);
}
/* Test worker-single/vector-partitioned mode. */
void t25()
{
int arr[32 * 32], i;
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) \
num_gangs(8) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
int k;
#pragma acc loop vector
for (k = 0; k < 32; k++)
{
#pragma acc atomic
arr[j * 32 + k]++;
}
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + 1);
}
/* Test worker-single, vector-partitioned, gang-redundant mode. */
#define ACTUAL_GANGS 8
void t27()
{
int n, arr[32], i;
int ondev;
for (i = 0; i < 32; i++)
arr[i] = 0;
n = 0;
#pragma acc parallel copy(n, arr) copyout(ondev) \
num_gangs(ACTUAL_GANGS) num_workers(8) vector_length(32)
{
int j;
ondev = acc_on_device (acc_device_not_host);
#pragma acc atomic
n++;
#pragma acc loop vector
for (j = 0; j < 32; j++)
{
#pragma acc atomic
arr[j] += 1;
}
#pragma acc atomic
n++;
}
int m = ondev ? ACTUAL_GANGS : 1;
assert (n == m * 2);
for (i = 0; i < 32; i++)
assert (arr[i] == m);
}
#undef ACTUAL_GANGS
/* Check if worker-single variables get broadcastd to vectors. */
#pragma acc routine
float t28_routine ()
{
return 2.71;
}
#define N 32
void t28()
{
float threads[N], v1 = 3.14;
for (int i = 0; i < N; i++)
threads[i] = -1;
#pragma acc parallel num_gangs (1) vector_length (32) copy (v1)
{
float val = t28_routine ();
#pragma acc loop vector
for (int i = 0; i < N; i++)
threads[i] = val + v1*i;
}
for (int i = 0; i < N; i++)
assert (fabs (threads[i] - (t28_routine () + v1*i)) < 0.0001);
}
#undef N
int main()
{
t1();
t2();
t4();
t5();
t7();
t8();
t9();
t10();
t11();
t12();
t13();
t16();
t17();
t18();
t19();
t20();
t21();
t22();
t23();
t24();
t25();
t27();
t28();
return 0;
}

View File

@ -0,0 +1,953 @@
#include <assert.h>
#include <openacc.h>
typedef struct {
int x, y;
} vec2;
typedef struct {
int x, y, z;
int attr[13];
} vec3_attr;
/* Test of gang-private variables declared in local scope with parallel
directive. */
void local_g_1()
{
int i, arr[32];
for (i = 0; i < 32; i++)
arr[i] = 3;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
int x;
#pragma acc loop gang(static:1)
for (i = 0; i < 32; i++)
x = i * 2;
#pragma acc loop gang(static:1)
for (i = 0; i < 32; i++)
{
if (acc_on_device (acc_device_host))
x = i * 2;
arr[i] += x;
}
}
for (i = 0; i < 32; i++)
assert (arr[i] == 3 + i * 2);
}
/* Test of worker-private variables declared in a local scope, broadcasting
to vector-partitioned mode. Back-to-back worker loops. */
void local_w_1()
{
int i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
int x = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
int x = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared in a local scope, broadcasting
to vector-partitioned mode. Successive vector loops. */
void local_w_2()
{
int i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
int x = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
x = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared in a local scope, broadcasting
to vector-partitioned mode. Aggregate worker variable. */
void local_w_3()
{
int i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
vec2 pt;
pt.x = i ^ j * 3;
pt.y = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt.x * k;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt.y * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared in a local scope, broadcasting
to vector-partitioned mode. Addressable worker variable. */
void local_w_4()
{
int i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
vec2 pt, *ptp;
ptp = &pt;
pt.x = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += ptp->x * k;
ptp->y = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt.y * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared in a local scope, broadcasting
to vector-partitioned mode. Array worker variable. */
void local_w_5()
{
int i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
int pt[2];
pt[0] = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt[0] * k;
pt[1] = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt[1] * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of gang-private variables declared on loop directive. */
void loop_g_1()
{
int x = 5, i, arr[32];
for (i = 0; i < 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang private(x)
for (i = 0; i < 32; i++)
{
x = i * 2;
arr[i] += x;
}
}
for (i = 0; i < 32; i++)
assert (arr[i] == i * 3);
}
/* Test of gang-private variables declared on loop directive, with broadcasting
to partitioned workers. */
void loop_g_2()
{
int x = 5, i, arr[32 * 32];
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang private(x)
for (i = 0; i < 32; i++)
{
x = i * 2;
#pragma acc loop worker
for (int j = 0; j < 32; j++)
arr[i * 32 + j] += x;
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + (i / 32) * 2);
}
/* Test of gang-private variables declared on loop directive, with broadcasting
to partitioned vectors. */
void loop_g_3()
{
int x = 5, i, arr[32 * 32];
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang private(x)
for (i = 0; i < 32; i++)
{
x = i * 2;
#pragma acc loop vector
for (int j = 0; j < 32; j++)
arr[i * 32 + j] += x;
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + (i / 32) * 2);
}
/* Test of gang-private addressable variable declared on loop directive, with
broadcasting to partitioned workers. */
void loop_g_4()
{
int x = 5, i, arr[32 * 32];
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang private(x)
for (i = 0; i < 32; i++)
{
int *p = &x;
x = i * 2;
#pragma acc loop worker
for (int j = 0; j < 32; j++)
arr[i * 32 + j] += x;
(*p)--;
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + (i / 32) * 2);
}
/* Test of gang-private array variable declared on loop directive, with
broadcasting to partitioned workers. */
void loop_g_5()
{
int x[8], i, arr[32 * 32];
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang private(x)
for (i = 0; i < 32; i++)
{
for (int j = 0; j < 8; j++)
x[j] = j * 2;
#pragma acc loop worker
for (int j = 0; j < 32; j++)
arr[i * 32 + j] += x[j % 8];
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + (i % 8) * 2);
}
/* Test of gang-private aggregate variable declared on loop directive, with
broadcasting to partitioned workers. */
void loop_g_6()
{
int i, arr[32 * 32];
vec3_attr pt;
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang private(pt)
for (i = 0; i < 32; i++)
{
pt.x = i;
pt.y = i * 2;
pt.z = i * 4;
pt.attr[5] = i * 6;
#pragma acc loop worker
for (int j = 0; j < 32; j++)
arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + (i / 32) * 13);
}
/* Test of vector-private variables declared on loop directive. */
void loop_v_1()
{
int x, i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
#pragma acc loop vector private(x)
for (k = 0; k < 32; k++)
{
x = i ^ j * 3;
arr[i * 1024 + j * 32 + k] += x * k;
}
#pragma acc loop vector private(x)
for (k = 0; k < 32; k++)
{
x = i | j * 5;
arr[i * 1024 + j * 32 + k] += x * k;
}
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of vector-private variables declared on loop directive. Array type. */
void loop_v_2()
{
int pt[2], i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker
for (j = 0; j < 32; j++)
{
int k;
#pragma acc loop vector private(pt)
for (k = 0; k < 32; k++)
{
pt[0] = i ^ j * 3;
pt[1] = i | j * 5;
arr[i * 1024 + j * 32 + k] += pt[0] * k;
arr[i * 1024 + j * 32 + k] += pt[1] * k;
}
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared on a loop directive. */
void loop_w_1()
{
int x = 5, i, arr[32 * 32];
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker private(x)
for (j = 0; j < 32; j++)
{
x = i ^ j * 3;
/* Try to ensure 'x' accesses doesn't get optimized into a
temporary. */
__asm__ __volatile__ ("");
arr[i * 32 + j] += x;
}
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
}
/* Test of worker-private variables declared on a loop directive, broadcasting
to vector-partitioned mode. */
void loop_w_2()
{
int x = 5, i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker private(x)
for (j = 0; j < 32; j++)
{
int k;
x = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k);
}
}
/* Test of worker-private variables declared on a loop directive, broadcasting
to vector-partitioned mode. Back-to-back worker loops. */
void loop_w_3()
{
int x = 5, i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker private(x)
for (j = 0; j < 32; j++)
{
int k;
x = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
#pragma acc loop worker private(x)
for (j = 0; j < 32; j++)
{
int k;
x = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared on a loop directive, broadcasting
to vector-partitioned mode. Successive vector loops. */
void loop_w_4()
{
int x = 5, i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker private(x)
for (j = 0; j < 32; j++)
{
int k;
x = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
x = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared on a loop directive, broadcasting
to vector-partitioned mode. Addressable worker variable. */
void loop_w_5()
{
int x = 5, i, arr[32 * 32 * 32];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker private(x)
for (j = 0; j < 32; j++)
{
int k;
int *p = &x;
x = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
*p = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += x * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared on a loop directive, broadcasting
to vector-partitioned mode. Aggregate worker variable. */
void loop_w_6()
{
int i, arr[32 * 32 * 32];
vec2 pt;
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
#pragma acc loop worker private(pt)
for (j = 0; j < 32; j++)
{
int k;
pt.x = i ^ j * 3;
pt.y = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt.x * k;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt.y * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of worker-private variables declared on loop directive, broadcasting
to vector-partitioned mode. Array worker variable. */
void loop_w_7()
{
int i, arr[32 * 32 * 32];
int pt[2];
for (i = 0; i < 32 * 32 * 32; i++)
arr[i] = i;
/* "pt" is treated as "present_or_copy" on the parallel directive because it
is an array variable. */
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
{
int j;
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
/* But here, it is made private per-worker. */
#pragma acc loop worker private(pt)
for (j = 0; j < 32; j++)
{
int k;
pt[0] = i ^ j * 3;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt[0] * k;
pt[1] = i | j * 5;
#pragma acc loop vector
for (k = 0; k < 32; k++)
arr[i * 1024 + j * 32 + k] += pt[1] * k;
}
}
}
for (i = 0; i < 32; i++)
for (int j = 0; j < 32; j++)
for (int k = 0; k < 32; k++)
{
int idx = i * 1024 + j * 32 + k;
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
}
}
/* Test of gang-private variables declared on the parallel directive. */
void parallel_g_1()
{
int x = 5, i, arr[32];
for (i = 0; i < 32; i++)
arr[i] = 3;
#pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32)
{
#pragma acc loop gang(static:1)
for (i = 0; i < 32; i++)
x = i * 2;
#pragma acc loop gang(static:1)
for (i = 0; i < 32; i++)
{
if (acc_on_device (acc_device_host))
x = i * 2;
arr[i] += x;
}
}
for (i = 0; i < 32; i++)
assert (arr[i] == 3 + i * 2);
}
/* Test of gang-private array variable declared on the parallel directive. */
void parallel_g_2()
{
int x[32], i, arr[32 * 32];
for (i = 0; i < 32 * 32; i++)
arr[i] = i;
#pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) vector_length(32)
{
#pragma acc loop gang
for (i = 0; i < 32; i++)
{
int j;
for (j = 0; j < 32; j++)
x[j] = j * 2;
#pragma acc loop worker
for (j = 0; j < 32; j++)
arr[i * 32 + j] += x[31 - j];
}
}
for (i = 0; i < 32 * 32; i++)
assert (arr[i] == i + (31 - (i % 32)) * 2);
}
int main ()
{
local_g_1();
local_w_1();
local_w_2();
local_w_3();
local_w_4();
local_w_5();
loop_g_1();
loop_g_2();
loop_g_3();
loop_g_4();
loop_g_5();
loop_g_6();
loop_v_1();
loop_v_2();
loop_w_1();
loop_w_2();
loop_w_3();
loop_w_4();
loop_w_5();
loop_w_6();
loop_w_7();
parallel_g_1();
parallel_g_2();
return 0;
}

View File

@ -0,0 +1,129 @@
/* Tests of reduction on loop directive. */
#include <assert.h>
/* Test of reduction on loop directive (gangs, non-private reduction
variable). */
void g_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
res = hres = 1;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang reduction(*:res)
for (i = 0; i < 12; i++)
res *= arr[i];
}
for (i = 0; i < 12; i++)
hres *= arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs and vectors, non-private
reduction variable). */
void gv_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs and workers, non-private
reduction variable). */
void gw_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang worker reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
reduction variable). */
void gwv_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(res)
{
#pragma acc loop gang worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
}
int main()
{
g_np_1();
gv_np_1();
gw_np_1();
gwv_np_1();
return 0;
}

View File

@ -0,0 +1,88 @@
// { dg-additional-options "-fno-exceptions" }
#include <stdio.h>
#include <stdlib.h>
#pragma acc routine
int fact(int n)
{
if (n == 0 || n == 1)
return 1;
else
return n * fact (n - 1);
}
int main()
{
int *s, *g, *w, *v, *gw, *gv, *wv, *gwv, i, n = 10;
s = (int *) malloc (sizeof (int) * n);
g = (int *) malloc (sizeof (int) * n);
w = (int *) malloc (sizeof (int) * n);
v = (int *) malloc (sizeof (int) * n);
gw = (int *) malloc (sizeof (int) * n);
gv = (int *) malloc (sizeof (int) * n);
wv = (int *) malloc (sizeof (int) * n);
gwv = (int *) malloc (sizeof (int) * n);
#pragma acc parallel loop async copyout(s[0:n]) seq
for (i = 0; i < n; i++)
s[i] = fact (i);
#pragma acc parallel loop async copyout(g[0:n]) gang
for (i = 0; i < n; i++)
g[i] = fact (i);
#pragma acc parallel loop async copyout(w[0:n]) worker
for (i = 0; i < n; i++)
w[i] = fact (i);
#pragma acc parallel loop async copyout(v[0:n]) vector
for (i = 0; i < n; i++)
v[i] = fact (i);
#pragma acc parallel loop async copyout(gw[0:n]) gang worker
for (i = 0; i < n; i++)
gw[i] = fact (i);
#pragma acc parallel loop async copyout(gv[0:n]) gang vector
for (i = 0; i < n; i++)
gv[i] = fact (i);
#pragma acc parallel loop async copyout(wv[0:n]) worker vector
for (i = 0; i < n; i++)
wv[i] = fact (i);
#pragma acc parallel loop async copyout(gwv[0:n]) gang worker vector
for (i = 0; i < n; i++)
gwv[i] = fact (i);
#pragma acc wait
for (i = 0; i < n; i++)
if (s[i] != fact (i))
abort ();
for (i = 0; i < n; i++)
if (g[i] != s[i])
abort ();
for (i = 0; i < n; i++)
if (w[i] != s[i])
abort ();
for (i = 0; i < n; i++)
if (v[i] != s[i])
abort ();
for (i = 0; i < n; i++)
if (gw[i] != s[i])
abort ();
for (i = 0; i < n; i++)
if (gv[i] != s[i])
abort ();
for (i = 0; i < n; i++)
if (wv[i] != s[i])
abort ();
for (i = 0; i < n; i++)
if (gwv[i] != s[i])
abort ();
return 0;
}

View File

@ -0,0 +1,123 @@
#include <stdlib.h>
#include <stdio.h>
#define M 8
#define N 32
#pragma acc routine vector
void
vector (int *a)
{
int i;
#pragma acc loop vector
for (i = 0; i < N; i++)
a[i] -= a[i];
}
#pragma acc routine worker
void
worker (int *b)
{
int i, j;
#pragma acc loop worker
for (i = 0; i < N; i++)
{
#pragma acc loop vector
for (j = 0; j < M; j++)
b[i * M + j] += b[i * M + j];
}
}
#pragma acc routine gang
void
gang (int *a)
{
int i;
#pragma acc loop gang worker vector
for (i = 0; i < N; i++)
a[i] -= i;
}
#pragma acc routine seq
void
seq (int *a)
{
int i;
for (i = 0; i < N; i++)
a[i] += 1;
}
int
main(int argc, char **argv)
{
int i;
int a[N];
int b[M * N];
i = 0;
for (i = 0; i < N; i++)
a[i] = 0;
#pragma acc parallel copy (a[0:N])
{
#pragma acc loop seq
for (i = 0; i < N; i++)
seq (&a[0]);
}
for (i = 0; i < N; i++)
{
if (a[i] != N)
abort ();
}
#pragma acc parallel copy (a[0:N])
{
#pragma acc loop seq
for (i = 0; i < N; i++)
gang (&a[0]);
}
for (i = 0; i < N; i++)
{
if (a[i] != N + (N * (-1 * i)))
abort ();
}
for (i = 0; i < N; i++)
a[i] = i;
#pragma acc parallel copy (b[0:M*N])
{
worker (&b[0]);
}
for (i = 0; i < N; i++)
{
if (a[i] != i)
abort ();
}
for (i = 0; i < N; i++)
a[i] = i;
#pragma acc parallel copy (a[0:N])
{
#pragma acc loop
for (i = 0; i < N; i++)
vector (&a[0]);
}
for (i = 0; i < N; i++)
{
if (a[i] != 0)
abort ();
}
return 0;
}

View File

@ -0,0 +1,76 @@
/* This code uses nvptx inline assembly guarded with acc_on_device, which is
not optimized away at -O0, and then confuses the target assembler.
{ dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
#include <stdio.h>
#include <openacc.h>
#define NUM_WORKERS 16
#define NUM_VECTORS 32
#define WIDTH 64
#define HEIGHT 32
#define WORK_ID(I,N) \
(acc_on_device (acc_device_nvidia) \
? ({unsigned __r; \
__asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r)); \
__r; }) : (I % N))
#define VEC_ID(I,N) \
(acc_on_device (acc_device_nvidia) \
? ({unsigned __r; \
__asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r)); \
__r; }) : (I % N))
#pragma acc routine worker
void __attribute__ ((noinline))
WorkVec (int *ptr, int w, int h, int nw, int nv)
{
#pragma acc loop worker
for (int i = 0; i < h; i++)
#pragma acc loop vector
for (int j = 0; j < w; j++)
ptr[i*w + j] = (WORK_ID (i, nw) << 8) | VEC_ID(j, nv);
}
int DoWorkVec (int nw)
{
int ary[HEIGHT][WIDTH];
int err = 0;
for (int ix = 0; ix != HEIGHT; ix++)
for (int jx = 0; jx != WIDTH; jx++)
ary[ix][jx] = 0xdeadbeef;
printf ("spawning %d ...", nw); fflush (stdout);
#pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary)
{
WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS);
}
for (int ix = 0; ix != HEIGHT; ix++)
for (int jx = 0; jx != WIDTH; jx++)
{
int exp = ((ix % nw) << 8) | (jx % NUM_VECTORS);
if (ary[ix][jx] != exp)
{
printf ("\nary[%d][%d] = %#x expected %#x", ix, jx,
ary[ix][jx], exp);
err = 1;
}
}
printf (err ? " failed\n" : " ok\n");
return err;
}
int main ()
{
int err = 0;
for (int W = 1; W <= NUM_WORKERS; W <<= 1)
err |= DoWorkVec (W);
return err;
}

View File

@ -1,361 +0,0 @@
/* Copy of update-1.c with self exchanged with host for #pragma acc update. */
/* { dg-do run } */
/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
#include <openacc.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
int
main (int argc, char **argv)
{
int N = 8;
int NDIV2 = N / 2;
float *a, *b, *c;
float *d_a, *d_b, *d_c;
int i;
a = (float *) malloc (N * sizeof (float));
b = (float *) malloc (N * sizeof (float));
c = (float *) malloc (N * sizeof (float));
d_a = (float *) acc_malloc (N * sizeof (float));
d_b = (float *) acc_malloc (N * sizeof (float));
d_c = (float *) acc_malloc (N * sizeof (float));
for (i = 0; i < N; i++)
{
a[i] = 3.0;
b[i] = 0.0;
}
acc_map_data (a, d_a, N * sizeof (float));
acc_map_data (b, d_b, N * sizeof (float));
acc_map_data (c, d_c, N * sizeof (float));
#pragma acc update device (a[0:N], b[0:N])
#pragma acc parallel present (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc update self (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
if (a[i] != 3.0)
abort ();
if (b[i] != 3.0)
abort ();
}
if (!acc_is_present (&a[0], (N * sizeof (float))))
abort ();
if (!acc_is_present (&b[0], (N * sizeof (float))))
abort ();
for (i = 0; i < N; i++)
{
a[i] = 5.0;
b[i] = 1.0;
}
#pragma acc update device (a[0:N], b[0:N])
#pragma acc parallel present (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc update self (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
if (a[i] != 5.0)
abort ();
if (b[i] != 5.0)
abort ();
}
if (!acc_is_present (&a[0], (N * sizeof (float))))
abort ();
if (!acc_is_present (&b[0], (N * sizeof (float))))
abort ();
for (i = 0; i < N; i++)
{
a[i] = 5.0;
b[i] = 1.0;
}
#pragma acc update device (a[0:N], b[0:N])
#pragma acc parallel present (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc update host (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
if (a[i] != 5.0)
abort ();
if (b[i] != 5.0)
abort ();
}
if (!acc_is_present (&a[0], (N * sizeof (float))))
abort ();
if (!acc_is_present (&b[0], (N * sizeof (float))))
abort ();
for (i = 0; i < N; i++)
{
a[i] = 6.0;
b[i] = 0.0;
}
#pragma acc update device (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
a[i] = 9.0;
}
#pragma acc parallel present (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc update self (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
if (a[i] != 6.0)
abort ();
if (b[i] != 6.0)
abort ();
}
if (!acc_is_present (&a[0], (N * sizeof (float))))
abort ();
if (!acc_is_present (&b[0], (N * sizeof (float))))
abort ();
for (i = 0; i < N; i++)
{
a[i] = 7.0;
b[i] = 2.0;
}
#pragma acc update device (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
a[i] = 9.0;
}
#pragma acc parallel present (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc update self (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
if (a[i] != 7.0)
abort ();
if (b[i] != 7.0)
abort ();
}
for (i = 0; i < N; i++)
{
a[i] = 9.0;
}
#pragma acc update device (a[0:N])
#pragma acc parallel present (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc update self (a[0:N], b[0:N])
for (i = 0; i < N; i++)
{
if (a[i] != 9.0)
abort ();
if (b[i] != 9.0)
abort ();
}
if (!acc_is_present (&a[0], (N * sizeof (float))))
abort ();
if (!acc_is_present (&b[0], (N * sizeof (float))))
abort ();
for (i = 0; i < N; i++)
{
a[i] = 5.0;
}
#pragma acc update device (a[0:N])
for (i = 0; i < N; i++)
{
a[i] = 6.0;
}
#pragma acc update device (a[0:NDIV2])
#pragma acc parallel present (a[0:N], b[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
b[ii] = a[ii];
}
#pragma acc update self (a[0:N], b[0:N])
for (i = 0; i < NDIV2; i++)
{
if (a[i] != 6.0)
abort ();
if (b[i] != 6.0)
abort ();
}
for (i = NDIV2; i < N; i++)
{
if (a[i] != 5.0)
abort ();
if (b[i] != 5.0)
abort ();
}
if (!acc_is_present (&a[0], (N * sizeof (float))))
abort ();
if (!acc_is_present (&b[0], (N * sizeof (float))))
abort ();
for (i = 0; i < N; i++)
{
a[i] = 0.0;
}
#pragma acc update device (a[0:4])
#pragma acc parallel present (a[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
a[ii] = a[ii] + 1.0;
}
#pragma acc update self (a[4:4])
for (i = 0; i < NDIV2; i++)
{
if (a[i] != 0.0)
abort ();
}
for (i = NDIV2; i < N; i++)
{
if (a[i] != 6.0)
abort ();
}
#pragma acc update self (a[0:4])
for (i = 0; i < NDIV2; i++)
{
if (a[i] != 1.0)
abort ();
}
for (i = NDIV2; i < N; i++)
{
if (a[i] != 6.0)
abort ();
}
a[2] = 9;
a[3] = 9;
a[4] = 9;
a[5] = 9;
#pragma acc update device (a[2:4])
#pragma acc parallel present (a[0:N])
{
int ii;
for (ii = 0; ii < N; ii++)
a[ii] = a[ii] + 1.0;
}
#pragma acc update self (a[2:4])
for (i = 0; i < 2; i++)
{
if (a[i] != 1.0)
abort ();
}
for (i = 2; i < 6; i++)
{
if (a[i] != 10.0)
abort ();
}
for (i = 6; i < N; i++)
{
if (a[i] != 6.0)
abort ();
}
return 0;
}

View File

@ -20,7 +20,7 @@ main (void)
#pragma acc parallel vector_length (32) copyin (a,b) copyout (c)
{
#pragma acc loop /* vector clause is missing, since it's not yet supported. */
#pragma acc loop vector
for (unsigned int i = 0; i < n; i++)
c[i] = a[i] + b[i];
}

View File

@ -1,28 +0,0 @@
#include <assert.h>
/* Test worker-single/vector-single mode. */
int
main (int argc, char *argv[])
{
int arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = 0;
#pragma acc parallel copy(arr) num_gangs(8) num_workers(8) vector_length(32)
{
int j;
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
#pragma acc atomic
arr[j]++;
}
}
for (i = 0; i < 32; i++)
assert (arr[i] == 1);
return 0;
}

View File

@ -1,28 +0,0 @@
#include <assert.h>
/* Test worker-single/vector-partitioned mode. */
int
main (int argc, char *argv[])
{
int arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = i;
#pragma acc parallel copy(arr) num_gangs(1) num_workers(8) vector_length(32)
{
int k;
#pragma acc loop vector
for (k = 0; k < 32; k++)
{
#pragma acc atomic
arr[k]++;
}
}
for (i = 0; i < 32; i++)
assert (arr[i] == i + 1);
return 0;
}

View File

@ -1,46 +0,0 @@
#include <assert.h>
#if defined(ACC_DEVICE_TYPE_host)
#define ACTUAL_GANGS 1
#else
#define ACTUAL_GANGS 8
#endif
/* Test worker-single, vector-partitioned, gang-redundant mode. */
int
main (int argc, char *argv[])
{
int n, arr[32], i;
for (i = 0; i < 32; i++)
arr[i] = 0;
n = 0;
#pragma acc parallel copy(n, arr) num_gangs(ACTUAL_GANGS) num_workers(8) \
vector_length(32)
{
int j;
#pragma acc atomic
n++;
#pragma acc loop vector
for (j = 0; j < 32; j++)
{
#pragma acc atomic
arr[j] += 1;
}
#pragma acc atomic
n++;
}
assert (n == ACTUAL_GANGS * 2);
for (i = 0; i < 32; i++)
assert (arr[i] == ACTUAL_GANGS);
return 0;
}

View File

@ -132,4 +132,126 @@ program asyncwait
if (d(i) .ne. 1.0) call abort
if (e(i) .ne. 11.0) call abort
end do
a(:) = 3.0
b(:) = 0.0
!$acc data copy (a(1:N)) copy (b(1:N))
!$acc kernels async
!$acc loop
do i = 1, N
b(i) = a(i)
end do
!$acc end kernels
!$acc wait
!$acc end data
do i = 1, N
if (a(i) .ne. 3.0) call abort
if (b(i) .ne. 3.0) call abort
end do
a(:) = 2.0
b(:) = 0.0
!$acc data copy (a(1:N)) copy (b(1:N))
!$acc kernels async (1)
!$acc loop
do i = 1, N
b(i) = a(i)
end do
!$acc end kernels
!$acc wait (1)
!$acc end data
do i = 1, N
if (a(i) .ne. 2.0) call abort
if (b(i) .ne. 2.0) call abort
end do
a(:) = 3.0
b(:) = 0.0
c(:) = 0.0
d(:) = 0.0
!$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N))
!$acc kernels async (1)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end kernels
!$acc kernels async (1)
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end kernels
!$acc kernels async (1)
!$acc loop
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
!$acc end kernels
!$acc wait (1)
!$acc end data
do i = 1, N
if (a(i) .ne. 3.0) call abort
if (b(i) .ne. 9.0) call abort
if (c(i) .ne. 4.0) call abort
if (d(i) .ne. 1.0) call abort
end do
a(:) = 2.0
b(:) = 0.0
c(:) = 0.0
d(:) = 0.0
e(:) = 0.0
!$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
!$acc kernels async (1)
do i = 1, N
b(i) = (a(i) * a(i) * a(i)) / a(i)
end do
!$acc end kernels
!$acc kernels async (1)
!$acc loop
do i = 1, N
c(i) = (a(i) * 4) / a(i)
end do
!$acc end kernels
!$acc kernels async (1)
!$acc loop
do i = 1, N
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
end do
!$acc end kernels
!$acc kernels wait (1) async (1)
!$acc loop
do i = 1, N
e(i) = a(i) + b(i) + c(i) + d(i)
end do
!$acc end kernels
!$acc wait (1)
!$acc end data
do i = 1, N
if (a(i) .ne. 2.0) call abort
if (b(i) .ne. 4.0) call abort
if (c(i) .ne. 4.0) call abort
if (d(i) .ne. 1.0) call abort
if (e(i) .ne. 11.0) call abort
end do
end program asyncwait

View File

@ -1,6 +1,6 @@
! { dg-do run }
program parallel_wait
program asyncwait
integer, parameter :: N = 64
real, allocatable :: a(:), b(:), c(:)
integer i
@ -30,6 +30,31 @@ program parallel_wait
end do
!$acc end parallel
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
!$acc kernels async (0)
!$acc loop
do i = 1, N
a(i) = 1
end do
!$acc end kernels
!$acc kernels async (1)
!$acc loop
do i = 1, N
b(i) = 1
end do
!$acc end kernels
!$acc kernels wait (0, 1)
!$acc loop
do i = 1, N
c(i) = a(i) + b(i)
end do
!$acc end kernels
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
@ -37,4 +62,4 @@ program parallel_wait
deallocate (a)
deallocate (b)
deallocate (c)
end program parallel_wait
end program asyncwait

View File

@ -1,6 +1,6 @@
! { dg-do run }
program parallel_wait
program asyncwait
integer, parameter :: N = 64
real, allocatable :: a(:), b(:), c(:)
integer i
@ -32,6 +32,33 @@ program parallel_wait
end do
!$acc end parallel
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
!$acc kernels async (0)
!$acc loop
do i = 1, N
a(i) = 1
end do
!$acc end kernels
!$acc kernels async (1)
!$acc loop
do i = 1, N
b(i) = 1
end do
!$acc end kernels
!$acc wait (0, 1)
!$acc kernels
!$acc loop
do i = 1, N
c(i) = a(i) + b(i)
end do
!$acc end kernels
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
@ -39,4 +66,4 @@ program parallel_wait
deallocate (a)
deallocate (b)
deallocate (c)
end program parallel_wait
end program asyncwait

View File

@ -0,0 +1,290 @@
! { dg-do run }
! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
program main
use openacc
implicit none
integer, parameter :: N = 32
real, allocatable :: a(:), b(:), c(:)
integer i
i = 0
allocate (a(N))
allocate (b(N))
allocate (c(N))
a(:) = 3.0
b(:) = 0.0
!$acc parallel copyin (a(1:N)) copyout (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 3.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 5.0
b(:) = 1.0
!$acc parallel copyin (a(1:N)) copyout (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 5.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 6.0
b(:) = 0.0
call acc_copyin (a, sizeof (a))
a(:) = 9.0
!$acc parallel present_or_copyin (a(1:N)) copyout (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 6.0) call abort
end do
call acc_copyout (a, sizeof (a))
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 6.0
b(:) = 0.0
!$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 6.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 5.0
b(:) = 2.0
call acc_copyin (b, sizeof (b))
!$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
do i = 1, N
if (a(i) .ne. 5.0) call abort
if (b(i) .ne. 2.0) call abort
end do
call acc_copyout (b, sizeof (b))
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 3.0;
b(:) = 4.0;
!$acc parallel copy (a(1:N)) copyout (b(1:N))
do i = 1, N
a(i) = a(i) + 1
b(i) = a(i) + 2
end do
!$acc end parallel
do i = 1, N
if (a(i) .ne. 4.0) call abort
if (b(i) .ne. 6.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 4.0
b(:) = 7.0
!$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
do i = 1, N
a(i) = a(i) + 1
b(i) = b(i) + 2
end do
!$acc end parallel
do i = 1, N
if (a(i) .ne. 5.0) call abort
if (b(i) .ne. 9.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 3.0
b(:) = 7.0
call acc_copyin (a, sizeof (a))
call acc_copyin (b, sizeof (b))
!$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
do i = 1, N
a(i) = a(i) + 1
b(i) = b(i) + 2
end do
!$acc end parallel
do i = 1, N
if (a(i) .ne. 3.0) call abort
if (b(i) .ne. 7.0) call abort
end do
call acc_copyout (a, sizeof (a))
call acc_copyout (b, sizeof (b))
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 3.0
b(:) = 7.0
!$acc parallel copyin (a(1:N)) create (c(1:N)) copyout (b(1:N))
do i = 1, N
c(i) = a(i)
b(i) = c(i)
end do
!$acc end parallel
do i = 1, N
if (a(i) .ne. 3.0) call abort
if (b(i) .ne. 3.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
if (acc_is_present (c) .eqv. .TRUE.) call abort
a(:) = 4.0
b(:) = 8.0
!$acc parallel copyin (a(1:N)) present_or_create (c(1:N)) copyout (b(1:N))
do i = 1, N
c(i) = a(i)
b(i) = c(i)
end do
!$acc end parallel
do i = 1, N
if (a(i) .ne. 4.0) call abort
if (b(i) .ne. 4.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
if (acc_is_present (c) .eqv. .TRUE.) call abort
a(:) = 4.0
call acc_copyin (a, sizeof (a))
call acc_copyin (b, sizeof (b))
call acc_copyin (c, sizeof (c))
!$acc parallel present (a(1:N)) present (c(1:N)) present (b(1:N))
do i = 1, N
c(i) = a(i)
b(i) = c(i)
end do
!$acc end parallel
call acc_copyout (a, sizeof (a))
call acc_copyout (b, sizeof (b))
call acc_copyout (c, sizeof (c))
do i = 1, N
if (a(i) .ne. 4.0) call abort
if (b(i) .ne. 4.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
if (acc_is_present (c) .eqv. .TRUE.) call abort
a(:) = 6.0
b(:) = 0.0
call acc_copyin (a, sizeof (a))
a(:) = 9.0
!$acc parallel pcopyin (a(1:N)) copyout (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 6.0) call abort
end do
call acc_copyout (a, sizeof (a))
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 6.0
b(:) = 0.0
!$acc parallel copyin (a(1:N)) pcopyout (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 6.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
a(:) = 5.0
b(:) = 7.0
!$acc parallel copyin (a(1:N)) pcreate (c(1:N)) copyout (b(1:N))
do i = 1, N
c(i) = a(i)
b(i) = c(i)
end do
!$acc end parallel
do i = 1, N
if (a(i) .ne. 5.0) call abort
if (b(i) .ne. 5.0) call abort
end do
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
if (acc_is_present (c) .eqv. .TRUE.) call abort
end program main

View File

@ -1,29 +1,22 @@
! { dg-do run { target openacc_nvidia_accel_selected } }
! Tests to exercise the declare directive along with
! the clauses: copy
! copyin
! copyout
! create
! present
! present_or_copy
! present_or_copyin
! present_or_copyout
! present_or_create
module vars
implicit none
integer z
!$acc declare create (z)
end module vars
subroutine subr6 (a, d)
implicit none
integer, parameter :: N = 8
integer :: i
integer :: a(N)
!$acc declare deviceptr (a)
integer :: d(N)
i = 0
!$acc parallel copy (d)
do i = 1, N
d(i) = a(i) + a(i)
end do
!$acc end parallel
end subroutine
subroutine subr5 (a, b, c, d)
implicit none
integer, parameter :: N = 8
@ -201,15 +194,6 @@ subroutine subr0 (a, b, c, d)
if (d(i) .ne. 13) call abort
end do
call subr6 (a, d)
call test (a, .true.)
call test (d, .false.)
do i = 1, N
if (d(i) .ne. 16) call abort
end do
end subroutine
program main
@ -241,8 +225,7 @@ program main
if (a(i) .ne. 8) call abort
if (b(i) .ne. 8) call abort
if (c(i) .ne. 8) call abort
if (d(i) .ne. 16) call abort
if (d(i) .ne. 13) call abort
end do
end program

View File

@ -0,0 +1,54 @@
! { dg-do run }
program main
implicit none
real a, b
real c
!$acc declare create (c)
a = 2.0
b = 0.0
!$acc parallel copy (a) create (b) default (none)
b = a
a = 1.0
a = a + b
!$acc end parallel
if (a .ne. 3.0) call abort
!$acc kernels copy (a) create (b) default (none)
b = a
a = 1.0
a = a + b
!$acc end kernels
if (a .ne. 4.0) call abort
!$acc parallel default (none) copy (a) create (b)
b = a
a = 1.0
a = a + b
!$acc end parallel
if (a .ne. 5.0) call abort
!$acc parallel default (none) copy (a)
c = a
a = 1.0
a = a + c
!$acc end parallel
if (a .ne. 6.0) call abort
!$acc data copy (a)
!$acc parallel default (none)
c = a
a = 1.0
a = a + c
!$acc end parallel
!$acc end data
if (a .ne. 7.0) call abort
end program main

View File

@ -0,0 +1,42 @@
! { dg-do run }
program firstprivate
integer, parameter :: Nupper=100
integer :: a, b(Nupper), c, d, n
include "openacc_lib.h"
if (acc_get_device_type () .eq. acc_device_nvidia) then
n = Nupper
else
n = 1
end if
b(:) = -1
a = 5
!$acc parallel firstprivate (a) num_gangs (n)
!$acc loop gang
do i = 1, n
a = a + i
b(i) = a
end do
!$acc end parallel
do i = 1, n
if (b(i) .ne. i + a) call abort ()
end do
!$acc data copy (a)
!$acc parallel firstprivate (a) copyout (c)
a = 10
c = a
!$acc end parallel
!$acc parallel copyout (d) present (a)
d = a
!$acc end parallel
!$acc end data
if (c .ne. 10) call abort ()
if (d .ne. 5) call abort ()
end program firstprivate

View File

@ -0,0 +1,79 @@
! { dg-do run }
program main
integer, parameter :: n = 100
integer i, a(n), b(n)
integer x
do i = 1, n
b(i) = i
end do
!$acc parallel loop gang (static:*) num_gangs (10)
do i = 1, n
a(i) = b(i) + 0
end do
!$acc end parallel loop
call test (a, b, 0, n)
!$acc parallel loop gang (static:1) num_gangs (10)
do i = 1, n
a(i) = b(i) + 1
end do
!$acc end parallel loop
call test (a, b, 1, n)
!$acc parallel loop gang (static:2) num_gangs (10)
do i = 1, n
a(i) = b(i) + 2
end do
!$acc end parallel loop
call test (a, b, 2, n)
!$acc parallel loop gang (static:5) num_gangs (10)
do i = 1, n
a(i) = b(i) + 5
end do
!$acc end parallel loop
call test (a, b, 5, n)
!$acc parallel loop gang (static:20) num_gangs (10)
do i = 1, n
a(i) = b(i) + 20
end do
!$acc end parallel loop
call test (a, b, 20, n)
x = 5
!$acc parallel loop gang (static:0+x) num_gangs (10)
do i = 1, n
a(i) = b(i) + 5
end do
!$acc end parallel loop
call test (a, b, 5, n)
x = 10
!$acc parallel loop gang (static:x) num_gangs (10)
do i = 1, n
a(i) = b(i) + 10
end do
!$acc end parallel loop
call test (a, b, 10, n)
end program main
subroutine test (a, b, sarg, n)
integer n
integer a (n), b(n), sarg
integer i
do i = 1, n
if (a(i) .ne. b(i) + sarg) call abort ()
end do
end subroutine test

View File

@ -0,0 +1,886 @@
! { dg-do run }
! { dg-additional-options "-cpp" }
program main
use openacc
implicit none
integer, parameter :: N = 8
integer, parameter :: one = 1
integer, parameter :: zero = 0
integer i, nn
real, allocatable :: a(:), b(:)
real exp, exp2
i = 0
allocate (a(N))
allocate (b(N))
a(:) = 4.0
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
#if ACC_MEM_SHARED
exp = 5.0
#else
exp = 4.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 16.0
!$acc parallel if (0 == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 17.0) call abort
end do
a(:) = 8.0
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
#if ACC_MEM_SHARED
exp = 9.0
#else
exp = 8.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 22.0
!$acc parallel if (zero == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 23.0) call abort
end do
a(:) = 16.0
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
#if ACC_MEM_SHARED
exp = 17.0;
#else
exp = 16.0;
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 76.0
!$acc parallel if (.FALSE.)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 77.0) call abort
end do
a(:) = 22.0
nn = 1
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
#if ACC_MEM_SHARED
exp = 23.0;
#else
exp = 22.0;
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 18.0
nn = 0
!$acc parallel if (nn == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 19.0) call abort
end do
a(:) = 49.0
nn = 1
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
#if ACC_MEM_SHARED
exp = 50.0
#else
exp = 49.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 38.0
nn = 0;
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 39.0) call abort
end do
a(:) = 91.0
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 92.0) call abort
end do
a(:) = 43.0
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
#if ACC_MEM_SHARED
exp = 44.0
#else
exp = 43.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 87.0
!$acc parallel if (one == 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end parallel
do i = 1, N
if (b(i) .ne. 88.0) call abort
end do
a(:) = 3.0
b(:) = 9.0
#if ACC_MEM_SHARED
exp = 0.0
exp2 = 0.0
#else
call acc_copyin (a, sizeof (a))
call acc_copyin (b, sizeof (b))
exp = 3.0;
exp2 = 9.0;
#endif
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
a(:) = 0.0
b(:) = 0.0
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
do i = 1, N
if (a(i) .ne. exp) call abort
if (b(i) .ne. exp2) call abort
end do
a(:) = 6.0
b(:) = 12.0
!$acc update device (a(1:N), b(1:N)) if (0 == 1)
a(:) = 0.0
b(:) = 0.0
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
do i = 1, N
if (a(i) .ne. exp) call abort
if (b(i) .ne. exp2) call abort
end do
a(:) = 26.0
b(:) = 21.0
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
a(:) = 0.0
b(:) = 0.0
!$acc update host (a(1:N), b(1:N)) if (0 == 1)
do i = 1, N
if (a(i) .ne. 0.0) call abort
if (b(i) .ne. 0.0) call abort
end do
#if !ACC_MEM_SHARED
call acc_copyout (a, sizeof (a))
call acc_copyout (b, sizeof (b))
#endif
a(:) = 4.0
b(:) = 0.0
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
!$acc parallel present (a(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
!$acc end data
do i = 1, N
if (b(i) .ne. 4.0) call abort
end do
a(:) = 8.0
b(:) = 1.0
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc end data
a(:) = 18.0
b(:) = 21.0
!$acc data copyin (a(1:N)) if (1 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (a) .eqv. .FALSE.) call abort
#endif
!$acc data copyout (b(1:N)) if (0 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc data copyout (b(1:N)) if (1 == 1)
!$acc parallel present (a(1:N)) present (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end parallel
!$acc end data
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc end data
!$acc end data
do i = 1, N
if (b(1) .ne. 18.0) call abort
end do
!$acc enter data copyin (b(1:N)) if (0 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (0 == 1)
!$acc enter data copyin (b(1:N)) if (1 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .FALSE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (1 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc enter data copyin (b(1:N)) if (zero == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (zero == 1)
!$acc enter data copyin (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .FALSE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc enter data copyin (b(1:N)) if (one == 0)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (one == 0)
!$acc enter data copyin (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .FALSE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
a(:) = 4.0
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
#if ACC_MEM_SHARED
exp = 5.0
#else
exp = 4.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 16.0
!$acc kernels if (0 == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
do i = 1, N
if (b(i) .ne. 17.0) call abort
end do
a(:) = 8.0
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
#if ACC_MEM_SHARED
exp = 9.0
#else
exp = 8.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 22.0
!$acc kernels if (zero == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
do i = 1, N
if (b(i) .ne. 23.0) call abort
end do
a(:) = 16.0
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
#if ACC_MEM_SHARED
exp = 17.0;
#else
exp = 16.0;
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 76.0
!$acc kernels if (.FALSE.)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
do i = 1, N
if (b(i) .ne. 77.0) call abort
end do
a(:) = 22.0
nn = 1
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
#if ACC_MEM_SHARED
exp = 23.0;
#else
exp = 22.0;
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 18.0
nn = 0
!$acc kernels if (nn == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
do i = 1, N
if (b(i) .ne. 19.0) call abort
end do
a(:) = 49.0
nn = 1
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
#if ACC_MEM_SHARED
exp = 50.0
#else
exp = 49.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 38.0
nn = 0;
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
do i = 1, N
if (b(i) .ne. 39.0) call abort
end do
a(:) = 91.0
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
do i = 1, N
if (b(i) .ne. 92.0) call abort
end do
a(:) = 43.0
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
#if ACC_MEM_SHARED
exp = 44.0
#else
exp = 43.0
#endif
do i = 1, N
if (b(i) .ne. exp) call abort
end do
a(:) = 87.0
!$acc kernels if (one == 0)
do i = 1, N
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
b(i) = a(i) + 1
else
b(i) = a(i)
end if
end do
!$acc end kernels
do i = 1, N
if (b(i) .ne. 88.0) call abort
end do
a(:) = 3.0
b(:) = 9.0
#if ACC_MEM_SHARED
exp = 0.0
exp2 = 0.0
#else
call acc_copyin (a, sizeof (a))
call acc_copyin (b, sizeof (b))
exp = 3.0;
exp2 = 9.0;
#endif
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
a(:) = 0.0
b(:) = 0.0
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
do i = 1, N
if (a(i) .ne. exp) call abort
if (b(i) .ne. exp2) call abort
end do
a(:) = 6.0
b(:) = 12.0
!$acc update device (a(1:N), b(1:N)) if (0 == 1)
a(:) = 0.0
b(:) = 0.0
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
do i = 1, N
if (a(i) .ne. exp) call abort
if (b(i) .ne. exp2) call abort
end do
a(:) = 26.0
b(:) = 21.0
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
a(:) = 0.0
b(:) = 0.0
!$acc update host (a(1:N), b(1:N)) if (0 == 1)
do i = 1, N
if (a(i) .ne. 0.0) call abort
if (b(i) .ne. 0.0) call abort
end do
#if !ACC_MEM_SHARED
call acc_copyout (a, sizeof (a))
call acc_copyout (b, sizeof (b))
#endif
a(:) = 4.0
b(:) = 0.0
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
!$acc kernels present (a(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end kernels
!$acc end data
do i = 1, N
if (b(i) .ne. 4.0) call abort
end do
a(:) = 8.0
b(:) = 1.0
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (a) .eqv. .TRUE.) call abort
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc end data
a(:) = 18.0
b(:) = 21.0
!$acc data copyin (a(1:N)) if (1 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (a) .eqv. .FALSE.) call abort
#endif
!$acc data copyout (b(1:N)) if (0 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc data copyout (b(1:N)) if (1 == 1)
!$acc kernels present (a(1:N)) present (b(1:N))
do i = 1, N
b(i) = a(i)
end do
!$acc end kernels
!$acc end data
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc end data
!$acc end data
do i = 1, N
if (b(1) .ne. 18.0) call abort
end do
!$acc enter data copyin (b(1:N)) if (0 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (0 == 1)
!$acc enter data copyin (b(1:N)) if (1 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .FALSE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (1 == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc enter data copyin (b(1:N)) if (zero == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (zero == 1)
!$acc enter data copyin (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .FALSE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc enter data copyin (b(1:N)) if (one == 0)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (one == 0)
!$acc enter data copyin (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .FALSE.) call abort
#endif
!$acc exit data delete (b(1:N)) if (one == 1)
#if !ACC_MEM_SHARED
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
end program main

View File

@ -0,0 +1,42 @@
! This test checks if the runtime can properly handle implicit
! firstprivate varaibles inside subroutines in modules.
! { dg-do run }
module test_mod
contains
subroutine test(x)
IMPLICIT NONE
INTEGER :: x, y, j
x = 5
!$ACC PARALLEL LOOP copyout (y)
DO j=1,10
y=x
ENDDO
!$ACC END PARALLEL LOOP
y = -1;
!$ACC PARALLEL LOOP firstprivate (y) copyout (x)
DO j=1,10
x=y
ENDDO
!$ACC END PARALLEL LOOP
end subroutine test
end module test_mod
program t
use test_mod
INTEGER :: x_min
x_min = 8
CALL test(x_min)
if (x_min .ne. -1) call abort
end program t

View File

@ -0,0 +1,19 @@
program foo
implicit none
integer, parameter :: n = 100
integer, dimension(n,n) :: a
integer :: i, j, sum = 0
a = 1
!$acc parallel copyin(a(1:n,1:n)) firstprivate (sum)
!$acc loop gang reduction(+:sum)
do i=1, n
!$acc loop vector reduction(+:sum)
do j=1, n
sum = sum + a(i, j)
enddo
enddo
!$acc end parallel
end program foo

View File

@ -0,0 +1,544 @@
! Miscellaneous tests for private variables.
! { dg-do run }
! Test of gang-private variables declared on loop directive.
subroutine t1()
integer :: x, i, arr(32)
do i = 1, 32
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang private(x)
do i = 1, 32
x = i * 2;
arr(i) = arr(i) + x
end do
!$acc end parallel
do i = 1, 32
if (arr(i) .ne. i * 3) call abort
end do
end subroutine t1
! Test of gang-private variables declared on loop directive, with broadcasting
! to partitioned workers.
subroutine t2()
integer :: x, i, j, arr(0:32*32)
do i = 0, 32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang private(x)
do i = 0, 31
x = i * 2;
!$acc loop worker
do j = 0, 31
arr(i * 32 + j) = arr(i * 32 + j) + x
end do
end do
!$acc end parallel
do i = 0, 32 * 32 - 1
if (arr(i) .ne. i + (i / 32) * 2) call abort
end do
end subroutine t2
! Test of gang-private variables declared on loop directive, with broadcasting
! to partitioned vectors.
subroutine t3()
integer :: x, i, j, arr(0:32*32)
do i = 0, 32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang private(x)
do i = 0, 31
x = i * 2;
!$acc loop vector
do j = 0, 31
arr(i * 32 + j) = arr(i * 32 + j) + x
end do
end do
!$acc end parallel
do i = 0, 32 * 32 - 1
if (arr(i) .ne. i + (i / 32) * 2) call abort
end do
end subroutine t3
! Test of gang-private addressable variable declared on loop directive, with
! broadcasting to partitioned workers.
subroutine t4()
type vec3
integer x, y, z, attr(13)
end type vec3
integer i, j, arr(0:32*32)
type(vec3) pt
do i = 0, 32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang private(pt)
do i = 0, 31
pt%x = i
pt%y = i * 2
pt%z = i * 4
pt%attr(5) = i * 6
!$acc loop vector
do j = 0, 31
arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5);
end do
end do
!$acc end parallel
do i = 0, 32 * 32 - 1
if (arr(i) .ne. i + (i / 32) * 13) call abort
end do
end subroutine t4
! Test of vector-private variables declared on loop directive.
subroutine t5()
integer :: x, i, j, k, idx, arr(0:32*32*32)
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker
do j = 0, 31
!$acc loop vector private(x)
do k = 0, 31
x = ieor(i, j * 3)
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
!$acc loop vector private(x)
do k = 0, 31
x = ior(i, j * 5)
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
call abort
end if
end do
end do
end do
end subroutine t5
! Test of vector-private variables declared on loop directive. Array type.
subroutine t6()
integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker
do j = 0, 31
!$acc loop vector private(x, pt)
do k = 0, 31
pt(1) = ieor(i, j * 3)
pt(2) = ior(i, j * 5)
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
call abort
end if
end do
end do
end do
end subroutine t6
! Test of worker-private variables declared on a loop directive.
subroutine t7()
integer :: x, i, j, arr(0:32*32)
common x
do i = 0, 32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang private(x)
do i = 0, 31
!$acc loop worker private(x)
do j = 0, 31
x = ieor(i, j * 3)
arr(i * 32 + j) = arr(i * 32 + j) + x
end do
end do
!$acc end parallel
do i = 0, 32 * 32 - 1
if (arr(i) .ne. i + ieor(i / 32, mod(i, 32) * 3)) call abort
end do
end subroutine t7
! Test of worker-private variables declared on a loop directive, broadcasting
! to vector-partitioned mode.
subroutine t8()
integer :: x, i, j, k, idx, arr(0:32*32*32)
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker private(x)
do j = 0, 31
x = ieor(i, j * 3)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k) call abort
end do
end do
end do
end subroutine t8
! Test of worker-private variables declared on a loop directive, broadcasting
! to vector-partitioned mode. Back-to-back worker loops.
subroutine t9()
integer :: x, i, j, k, idx, arr(0:32*32*32)
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker private(x)
do j = 0, 31
x = ieor(i, j * 3)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
end do
!$acc loop worker private(x)
do j = 0, 31
x = ior(i, j * 5)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
call abort
end if
end do
end do
end do
end subroutine t9
! Test of worker-private variables declared on a loop directive, broadcasting
! to vector-partitioned mode. Successive vector loops. */
subroutine t10()
integer :: x, i, j, k, idx, arr(0:32*32*32)
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker private(x)
do j = 0, 31
x = ieor(i, j * 3)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
x = ior(i, j * 5)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
call abort
end if
end do
end do
end do
end subroutine t10
! Test of worker-private variables declared on a loop directive, broadcasting
! to vector-partitioned mode. Addressable worker variable.
subroutine t11()
integer :: i, j, k, idx, arr(0:32*32*32)
integer, target :: x
integer, pointer :: p
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker private(x, p)
do j = 0, 31
p => x
x = ieor(i, j * 3)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
p = ior(i, j * 5)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
call abort
end if
end do
end do
end do
end subroutine t11
! Test of worker-private variables declared on a loop directive, broadcasting
! to vector-partitioned mode. Aggregate worker variable.
subroutine t12()
type vec2
integer x, y
end type vec2
integer :: i, j, k, idx, arr(0:32*32*32)
type(vec2) :: pt
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker private(pt)
do j = 0, 31
pt%x = ieor(i, j * 3)
pt%y = ior(i, j * 5)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k
end do
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
call abort
end if
end do
end do
end do
end subroutine t12
! Test of worker-private variables declared on loop directive, broadcasting
! to vector-partitioned mode. Array worker variable.
subroutine t13()
integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
do i = 0, 32*32*32-1
arr(i) = i
end do
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
!$acc loop gang
do i = 0, 31
!$acc loop worker private(pt)
do j = 0, 31
pt(1) = ieor(i, j * 3)
pt(2) = ior(i, j * 5)
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
end do
!$acc loop vector
do k = 0, 31
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
end do
end do
end do
!$acc end parallel
do i = 0, 32 - 1
do j = 0, 32 -1
do k = 0, 32 - 1
idx = i * 1024 + j * 32 + k
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
call abort
end if
end do
end do
end do
end subroutine t13
! Test of gang-private variables declared on the parallel directive.
subroutine t14()
use openacc
integer :: x = 5
integer, parameter :: n = 32
integer :: arr(n)
do i = 1, n
arr(i) = 3
end do
!$acc parallel private(x) copy(arr) num_gangs(n) num_workers(8) vector_length(32)
!$acc loop gang(static:1)
do i = 1, n
x = i * 2;
end do
!$acc loop gang(static:1)
do i = 1, n
if (acc_on_device (acc_device_host) .eqv. .TRUE.) x = i * 2
arr(i) = arr(i) + x
end do
!$acc end parallel
do i = 1, n
if (arr(i) .ne. (3 + i * 2)) call abort
end do
end subroutine t14
program main
call t1()
call t2()
call t3()
call t4()
call t5()
call t6()
call t7()
call t8()
call t9()
call t10()
call t11()
call t12()
call t13()
call t14()
end program main