mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-20 12:41:17 +08:00
Update OpenACC test cases
gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: Clean up dg-* directives. * c-c++-common/goacc/loop-clauses.c: Likewise. * g++.dg/goacc/template.C: Likewise. * gfortran.dg/goacc/combined-directives.f90: Likewise. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-5.f95: Likewise. * gfortran.dg/goacc/loop-6.f95: Likewise. * gfortran.dg/goacc/loop-tree-1.f90: Likewise. * c-c++-common/goacc-gomp/nesting-1.c: Update. * c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise. * c-c++-common/goacc/clauses-fail.c: Likewise. * c-c++-common/goacc/parallel-1.c: Likewise. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. * c-c++-common/goacc/routine-3.c: Likewise. * c-c++-common/goacc/routine-4.c: Likewise. * c-c++-common/goacc/routine-5.c: Likewise. * c-c++-common/goacc/tile.c: Likewise. * g++.dg/goacc/template.C: Likewise. * gfortran.dg/goacc/combined-directives.f90: Likewise. * c-c++-common/goacc/nesting-1.c: Move dg-error test cases into... * c-c++-common/goacc/nesting-fail-1.c: ... this file. Update. * c-c++-common/goacc/kernels-1.c: Update. Incorporate... * c-c++-common/goacc/kernels-empty.c: ... this file, and... * c-c++-common/goacc/kernels-eternal.c: ... this file, and... * c-c++-common/goacc/kernels-noreturn.c: ... this file. * c-c++-common/goacc/host_data-1.c: New file. Incorporate... * c-c++-common/goacc/use_device-1.c: ... this file. * c-c++-common/goacc/host_data-2.c: New file. Incorporate... * c-c++-common/goacc/host_data-5.c: ... this file, and... * c-c++-common/goacc/host_data-6.c: ... this file. * c-c++-common/goacc/loop-2-kernels.c: New file. * c-c++-common/goacc/loop-2-parallel.c: Likewise. * c-c++-common/goacc/loop-3.c: Likewise. * g++.dg/goacc/reference.C: Likewise. * g++.dg/goacc/routine-1.C: Likewise. * g++.dg/goacc/routine-2.C: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Update. * testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/vector-loop.c: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/declare-1.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise. XFAIL. * testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Update. Incorporate... * testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c: ... this file. * testsuite/libgomp.oacc-c++/template-reduction.C: New file. * testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/private-variables.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise. * testsuite/libgomp.oacc-fortran/clauses-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/default-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/firstprivate-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/gang-static-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/if-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90: Likewise. * testsuite/libgomp.oacc-fortran/pr68813.f90: Likewise. * testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Merge this file... * testsuite/libgomp.oacc-c-c++-common/parallel-1.c: ..., and this file into... * testsuite/libgomp.oacc-c-c++-common/data-clauses.h: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c: New file. * testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-2.c: Rename to... * testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/parallel-2.c: Rename to... * testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: New file. Incorporate... * testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/worker-single-4.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/worker-single-6.c: ... this file. * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Remove file. Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com> Co-Authored-By: Chung-Lin Tang <cltang@codesourcery.com> Co-Authored-By: James Norris <jnorris@codesourcery.com> Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Nathan Sidwell <nathan@codesourcery.com> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r234575
This commit is contained in:
parent
ba9c755f25
commit
2620c80db0
@ -1,3 +1,52 @@
|
||||
2016-03-30 Thomas Schwinge <thomas@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
Chung-Lin Tang <cltang@codesourcery.com>
|
||||
Cesar Philippidis <cesar@codesourcery.com>
|
||||
James Norris <jnorris@codesourcery.com>
|
||||
Tom de Vries <tom@codesourcery.com>
|
||||
Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* c-c++-common/goacc/combined-directives.c: Clean up dg-*
|
||||
directives.
|
||||
* c-c++-common/goacc/loop-clauses.c: Likewise.
|
||||
* g++.dg/goacc/template.C: Likewise.
|
||||
* gfortran.dg/goacc/combined-directives.f90: Likewise.
|
||||
* gfortran.dg/goacc/loop-1.f95: Likewise.
|
||||
* gfortran.dg/goacc/loop-5.f95: Likewise.
|
||||
* gfortran.dg/goacc/loop-6.f95: Likewise.
|
||||
* gfortran.dg/goacc/loop-tree-1.f90: Likewise.
|
||||
* c-c++-common/goacc-gomp/nesting-1.c: Update.
|
||||
* c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise.
|
||||
* c-c++-common/goacc/clauses-fail.c: Likewise.
|
||||
* c-c++-common/goacc/parallel-1.c: Likewise.
|
||||
* c-c++-common/goacc/reduction-1.c: Likewise.
|
||||
* c-c++-common/goacc/reduction-2.c: Likewise.
|
||||
* c-c++-common/goacc/reduction-3.c: Likewise.
|
||||
* c-c++-common/goacc/reduction-4.c: Likewise.
|
||||
* c-c++-common/goacc/routine-3.c: Likewise.
|
||||
* c-c++-common/goacc/routine-4.c: Likewise.
|
||||
* c-c++-common/goacc/routine-5.c: Likewise.
|
||||
* c-c++-common/goacc/tile.c: Likewise.
|
||||
* g++.dg/goacc/template.C: Likewise.
|
||||
* gfortran.dg/goacc/combined-directives.f90: Likewise.
|
||||
* c-c++-common/goacc/nesting-1.c: Move dg-error test cases into...
|
||||
* c-c++-common/goacc/nesting-fail-1.c: ... this file. Update.
|
||||
* c-c++-common/goacc/kernels-1.c: Update. Incorporate...
|
||||
* c-c++-common/goacc/kernels-empty.c: ... this file, and...
|
||||
* c-c++-common/goacc/kernels-eternal.c: ... this file, and...
|
||||
* c-c++-common/goacc/kernels-noreturn.c: ... this file.
|
||||
* c-c++-common/goacc/host_data-1.c: New file. Incorporate...
|
||||
* c-c++-common/goacc/use_device-1.c: ... this file.
|
||||
* c-c++-common/goacc/host_data-2.c: New file. Incorporate...
|
||||
* c-c++-common/goacc/host_data-5.c: ... this file, and...
|
||||
* c-c++-common/goacc/host_data-6.c: ... this file.
|
||||
* c-c++-common/goacc/loop-2-kernels.c: New file.
|
||||
* c-c++-common/goacc/loop-2-parallel.c: Likewise.
|
||||
* c-c++-common/goacc/loop-3.c: Likewise.
|
||||
* g++.dg/goacc/reference.C: Likewise.
|
||||
* g++.dg/goacc/routine-1.C: Likewise.
|
||||
* g++.dg/goacc/routine-2.C: Likewise.
|
||||
|
||||
2016-03-30 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/70450
|
||||
|
@ -20,12 +20,12 @@ f_acc_kernels (void)
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc routine vector
|
||||
void
|
||||
f_acc_loop (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
|
@ -1,4 +1,5 @@
|
||||
extern int i;
|
||||
#pragma acc declare create(i)
|
||||
|
||||
void
|
||||
f_omp (void)
|
||||
@ -14,6 +15,9 @@ f_omp (void)
|
||||
#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */
|
||||
#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */
|
||||
#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */
|
||||
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
|
||||
for (i = 0; i < 2; ++i)
|
||||
;
|
||||
}
|
||||
|
||||
#pragma omp for
|
||||
@ -358,85 +362,77 @@ f_acc_data (void)
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc routine
|
||||
void
|
||||
f_acc_loop (void)
|
||||
{
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
for (i = 0; i < 3; i++)
|
||||
;
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
{
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
}
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
;
|
||||
#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */
|
||||
#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC routine" } */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* Miscellaneous tests where clause parsing is expected to fail. */
|
||||
|
||||
void
|
||||
f (void)
|
||||
{
|
||||
@ -17,3 +19,13 @@ f (void)
|
||||
for (i = 0; i < 2; ++i)
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
f2 (void)
|
||||
{
|
||||
int a, b[100];
|
||||
|
||||
#pragma acc parallel firstprivate (b[10:20]) /* { dg-error "expected ... before ... token" } */
|
||||
;
|
||||
}
|
||||
|
@ -1,10 +1,7 @@
|
||||
// { dg-do compile }
|
||||
// { dg-options "-fopenacc -fdump-tree-gimple" }
|
||||
// { dg-additional-options "-fdump-tree-gimple" }
|
||||
|
||||
// This error is temporary. Remove when support is added for these clauses
|
||||
// in the middle end. Also remove the comments from the reduction test
|
||||
// Remove the comments from the reduction test
|
||||
// after the FE learns that reduction variables may appear in data clauses too.
|
||||
// { dg-prune-output "sorry, unimplemented" }
|
||||
|
||||
void
|
||||
test ()
|
||||
|
@ -1,4 +1,14 @@
|
||||
/* { dg-do compile } */
|
||||
/* Test valid use of host_data directive. */
|
||||
|
||||
int v1[3][3];
|
||||
|
||||
void
|
||||
f (void)
|
||||
{
|
||||
#pragma acc host_data use_device(v1)
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
void bar (float *, float *);
|
||||
|
78
gcc/testsuite/c-c++-common/goacc/host_data-2.c
Normal file
78
gcc/testsuite/c-c++-common/goacc/host_data-2.c
Normal file
@ -0,0 +1,78 @@
|
||||
/* Test invalid use of host_data directive. */
|
||||
|
||||
int v0;
|
||||
#pragma acc host_data use_device(v0) /* { dg-error "expected declaration specifiers before" } */
|
||||
|
||||
|
||||
void
|
||||
f (void)
|
||||
{
|
||||
int v2 = 3;
|
||||
#pragma acc host_data copy(v2) /* { dg-error ".copy. is not valid for ..pragma acc host_data." } */
|
||||
;
|
||||
|
||||
#pragma acc host_data use_device(v2)
|
||||
;
|
||||
/* { dg-error ".use_device_ptr. variable is neither a pointer nor an array" "" { target c } 14 } */
|
||||
/* { dg-error ".use_device_ptr. variable is neither a pointer, nor an arraynor reference to pointer or array" "" { target c++ } 14 } */
|
||||
|
||||
#pragma acc host_data use_device(v0)
|
||||
;
|
||||
/* { dg-error ".use_device_ptr. variable is neither a pointer nor an array" "" { target c } 19 } */
|
||||
/* { dg-error ".use_device_ptr. variable is neither a pointer, nor an arraynor reference to pointer or array" "" { target c++ } 19 } */
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
f2 (void)
|
||||
{
|
||||
int x[100];
|
||||
|
||||
#pragma acc enter data copyin (x)
|
||||
/* Specifying an array index is not valid for host_data/use_device. */
|
||||
#pragma acc host_data use_device (x[4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */
|
||||
;
|
||||
#pragma acc exit data delete (x)
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
f3 (void)
|
||||
{
|
||||
int x[100];
|
||||
|
||||
#pragma acc data copyin (x[25:50])
|
||||
{
|
||||
int *xp;
|
||||
#pragma acc host_data use_device (x)
|
||||
{
|
||||
/* This use of the present clause is undefined behavior for OpenACC. */
|
||||
#pragma acc parallel present (x) copyout (xp) /* { dg-error "variable .x. declared in enclosing .host_data. region" } */
|
||||
{
|
||||
xp = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
f4 (void)
|
||||
{
|
||||
int x[50];
|
||||
|
||||
#pragma acc data copyin (x[10:30])
|
||||
{
|
||||
int *xp;
|
||||
#pragma acc host_data use_device (x)
|
||||
{
|
||||
/* Here 'x' being implicitly firstprivate for the parallel region
|
||||
conflicts with it being declared as use_device in the enclosing
|
||||
host_data region. */
|
||||
#pragma acc parallel copyout (xp)
|
||||
{
|
||||
xp = x; /* { dg-error "variable .x. declared in enclosing .host_data. region" } */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
int main (int argc, char* argv[])
|
||||
{
|
||||
int x[N];
|
||||
|
||||
#pragma acc data copyin (x[0:N])
|
||||
{
|
||||
int *xp;
|
||||
#pragma acc host_data use_device (x)
|
||||
{
|
||||
/* This use of the present clause is undefined behavior for OpenACC. */
|
||||
#pragma acc parallel present (x) copyout (xp) /* { dg-error "variable 'x' declared in enclosing 'host_data' region" } */
|
||||
{
|
||||
xp = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
int main (int argc, char* argv[])
|
||||
{
|
||||
int x[N];
|
||||
|
||||
#pragma acc data copyin (x[0:N])
|
||||
{
|
||||
int *xp;
|
||||
#pragma acc host_data use_device (x)
|
||||
{
|
||||
/* Here 'x' being implicitly firstprivate for the parallel region
|
||||
conflicts with it being declared as use_device in the enclosing
|
||||
host_data region. */
|
||||
#pragma acc parallel copyout (xp)
|
||||
{
|
||||
xp = x; /* { dg-error "variable 'x' declared in enclosing 'host_data' region" } */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,6 +1,45 @@
|
||||
void
|
||||
foo (void)
|
||||
int
|
||||
kernels_empty (void)
|
||||
{
|
||||
#pragma acc kernels
|
||||
;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
kernels_eternal (void)
|
||||
{
|
||||
#pragma acc kernels
|
||||
{
|
||||
while (1)
|
||||
;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
kernels_noreturn (void)
|
||||
{
|
||||
#pragma acc kernels
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
float b[10][15][10];
|
||||
|
||||
void
|
||||
kernels_loop_ptr_it (void)
|
||||
{
|
||||
float *i;
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
#pragma acc loop
|
||||
for (i = &b[0][0][0]; i < &b[0][0][10]; i++)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +0,0 @@
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
#pragma acc kernels
|
||||
;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
#pragma acc kernels
|
||||
{
|
||||
while (1)
|
||||
;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
189
gcc/testsuite/c-c++-common/goacc/loop-2-kernels.c
Normal file
189
gcc/testsuite/c-c++-common/goacc/loop-2-kernels.c
Normal file
@ -0,0 +1,189 @@
|
||||
void K(void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
#pragma acc loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang(5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang(num:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang(static:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang(static:*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (j = 0; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop gang // { dg-error "inner loop uses same" }
|
||||
for (j = 0; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop seq gang // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker(5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker(num:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (j = 0; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop worker // { dg-error "inner loop uses same" }
|
||||
for (j = 0; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop seq worker // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector(5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector(length:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
#pragma acc loop vector // { dg-error "inner loop uses same" }
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop worker
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop gang
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop seq vector // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop seq auto // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang auto // { dg-error "'auto' conflicts" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker auto // { dg-error "'auto' conflicts" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector auto // { dg-error "'auto' conflicts" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
}
|
||||
|
||||
#pragma acc kernels loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop gang
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop gang(5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop gang(num:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop gang(static:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop gang(static:*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc kernels loop worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop worker(5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop worker(num:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop seq worker // { dg-error "'seq' overrides" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc kernels loop gang worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc kernels loop vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop vector(5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop vector(length:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop seq vector // { dg-error "'seq' overrides" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc kernels loop gang vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop worker vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc kernels loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop seq auto // { dg-error "'seq' overrides" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc kernels loop gang auto // { dg-error "'auto' conflicts" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc kernels loop worker auto // { dg-error "'auto' conflicts" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc kernels loop vector auto // { dg-error "'auto' conflicts" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
|
||||
{ }
|
||||
}
|
162
gcc/testsuite/c-c++-common/goacc/loop-2-parallel.c
Normal file
162
gcc/testsuite/c-c++-common/goacc/loop-2-parallel.c
Normal file
@ -0,0 +1,162 @@
|
||||
void P(void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc parallel
|
||||
{
|
||||
#pragma acc loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang(static:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang(static:*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang // { dg-message "containing loop" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop worker
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop gang // { dg-error "inner loop uses same" }
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop seq gang // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker // { dg-message "containing loop" 2 }
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop worker // { dg-error "inner loop uses same" }
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop gang // { dg-error "incorrectly nested" }
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop seq worker // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector // { dg-message "containing loop" 3 }
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
#pragma acc loop vector // { dg-error "inner loop uses same" }
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop worker // { dg-error "incorrectly nested" }
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
#pragma acc loop gang // { dg-error "incorrectly nested" }
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop seq vector // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop seq auto // { dg-error "'seq' overrides" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang auto // { dg-error "'auto' conflicts" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker auto // { dg-error "'auto' conflicts" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector auto // { dg-error "'auto' conflicts" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
}
|
||||
|
||||
#pragma acc parallel loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop gang
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop gang(static:5)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop gang(static:*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop seq gang // { dg-error "'seq' overrides" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop seq worker // { dg-error "'seq' overrides" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop gang worker
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop seq vector // { dg-error "'seq' overrides" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop gang vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop worker vector
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop auto
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop seq auto // { dg-error "'seq' overrides" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'seq' overrides" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop gang auto // { dg-error "'auto' conflicts" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop worker auto // { dg-error "'auto' conflicts" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop vector auto // { dg-error "'auto' conflicts" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "'auto' conflicts" "" { target c++ } }
|
||||
{ }
|
||||
}
|
58
gcc/testsuite/c-c++-common/goacc/loop-3.c
Normal file
58
gcc/testsuite/c-c++-common/goacc/loop-3.c
Normal file
@ -0,0 +1,58 @@
|
||||
void par1 (void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc parallel
|
||||
{
|
||||
#pragma acc loop gang(5) // { dg-error "argument not permitted" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop gang(num:5) // { dg-error "argument not permitted" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop worker(5) // { dg-error "argument not permitted" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop worker(num:5) // { dg-error "argument not permitted" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop vector(5) // { dg-error "argument not permitted" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
#pragma acc loop vector(length:5) // { dg-error "argument not permitted" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void p2 (void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc parallel loop gang(5) // { dg-error "argument not permitted" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop gang(num:5) // { dg-error "argument not permitted" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop worker(5) // { dg-error "argument not permitted" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop worker(num:5) // { dg-error "argument not permitted" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
|
||||
{ }
|
||||
|
||||
#pragma acc parallel loop vector(5) // { dg-error "argument not permitted" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
|
||||
{ }
|
||||
#pragma acc parallel loop vector(length:5) // { dg-error "argument not permitted" "" { target c } }
|
||||
for (i = 0; i < 10; i++) // { dg-error "argument not permitted" "" { target c++ } }
|
||||
{ }
|
||||
}
|
@ -1,7 +1,3 @@
|
||||
/* { dg-do compile } */
|
||||
|
||||
/* { dg-prune-output "sorry, unimplemented" } */
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
|
@ -58,10 +58,6 @@ f_acc_data (void)
|
||||
|
||||
#pragma acc exit data delete(i)
|
||||
|
||||
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
|
||||
for (i = 0; i < 2; ++i)
|
||||
;
|
||||
|
||||
#pragma acc data
|
||||
{
|
||||
#pragma acc parallel
|
||||
@ -92,10 +88,6 @@ f_acc_data (void)
|
||||
#pragma acc enter data copyin(i)
|
||||
|
||||
#pragma acc exit data delete(i)
|
||||
|
||||
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
|
||||
for (i = 0; i < 2; ++i)
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -38,6 +38,25 @@ f_acc_kernels (void)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
f_acc_data (void)
|
||||
{
|
||||
unsigned int i;
|
||||
#pragma acc data
|
||||
{
|
||||
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
|
||||
for (i = 0; i < 2; ++i)
|
||||
;
|
||||
|
||||
#pragma acc data
|
||||
{
|
||||
#pragma acc loop /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
|
||||
for (i = 0; i < 2; ++i)
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc routine
|
||||
void
|
||||
f_acc_routine (void)
|
||||
@ -45,3 +64,13 @@ f_acc_routine (void)
|
||||
#pragma acc parallel /* { dg-error "OpenACC region inside of OpenACC routine, nested parallelism not supported yet" } */
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
f (void)
|
||||
{
|
||||
int i, v = 0;
|
||||
|
||||
#pragma acc loop gang reduction (+:v) /* { dg-error "loop directive must be associated with an OpenACC compute region" } */
|
||||
for (i = 0; i < 10; i++)
|
||||
v++;
|
||||
}
|
||||
|
@ -1,6 +1,38 @@
|
||||
void
|
||||
foo (void)
|
||||
int
|
||||
parallel_empty (void)
|
||||
{
|
||||
#pragma acc parallel
|
||||
;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
parallel_eternal (void)
|
||||
{
|
||||
#pragma acc parallel
|
||||
{
|
||||
while (1)
|
||||
;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
parallel_noreturn (void)
|
||||
{
|
||||
#pragma acc parallel
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
parallel_clauses (void)
|
||||
{
|
||||
int a, b[100];
|
||||
|
||||
#pragma acc parallel firstprivate (a, b)
|
||||
;
|
||||
}
|
||||
|
@ -1,70 +1,65 @@
|
||||
/* { dg-require-effective-target alloca } */
|
||||
/* Integer reductions. */
|
||||
|
||||
#define vl 32
|
||||
#define n 1000
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
const int n = 1000;
|
||||
int i;
|
||||
int result, array[n];
|
||||
int lresult;
|
||||
|
||||
/* '+' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (+:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (+:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result += array[i];
|
||||
|
||||
/* '*' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (*:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (*:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result *= array[i];
|
||||
|
||||
// result = 0;
|
||||
// vresult = 0;
|
||||
//
|
||||
// /* 'max' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result > array[i] ? result : array[i];
|
||||
//
|
||||
// /* 'min' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result < array[i] ? result : array[i];
|
||||
/* 'max' reductions. */
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (max:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result > array[i] ? result : array[i];
|
||||
|
||||
/* 'min' reductions. */
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (min:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result < array[i] ? result : array[i];
|
||||
|
||||
/* '&' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (&:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (&:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result &= array[i];
|
||||
|
||||
/* '|' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (|:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (|:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result |= array[i];
|
||||
|
||||
/* '^' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (^:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (^:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result ^= array[i];
|
||||
|
||||
/* '&&' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (&&:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (&&:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult && (result > array[i]);
|
||||
|
||||
/* '||' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (||:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (||:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult || (result > array[i]);
|
||||
|
||||
|
@ -1,49 +1,47 @@
|
||||
/* { dg-require-effective-target alloca } */
|
||||
/* float reductions. */
|
||||
|
||||
#define vl 32
|
||||
#define n 1000
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
const int n = 1000;
|
||||
int i;
|
||||
float result, array[n];
|
||||
int lresult;
|
||||
|
||||
/* '+' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (+:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (+:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result += array[i];
|
||||
|
||||
/* '*' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (*:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (*:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result *= array[i];
|
||||
|
||||
// /* 'max' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result > array[i] ? result : array[i];
|
||||
//
|
||||
// /* 'min' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result < array[i] ? result : array[i];
|
||||
/* 'max' reductions. */
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (max:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result > array[i] ? result : array[i];
|
||||
|
||||
/* 'min' reductions. */
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (min:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result < array[i] ? result : array[i];
|
||||
|
||||
/* '&&' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (&&:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (&&:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult && (result > array[i]);
|
||||
|
||||
/* '||' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (||:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (||:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult || (result > array[i]);
|
||||
|
||||
|
@ -1,49 +1,47 @@
|
||||
/* { dg-require-effective-target alloca } */
|
||||
/* double reductions. */
|
||||
|
||||
#define vl 32
|
||||
#define n 1000
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
const int n = 1000;
|
||||
int i;
|
||||
double result, array[n];
|
||||
int lresult;
|
||||
|
||||
/* '+' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (+:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (+:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result += array[i];
|
||||
|
||||
/* '*' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (*:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (*:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result *= array[i];
|
||||
|
||||
// /* 'max' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result > array[i] ? result : array[i];
|
||||
//
|
||||
// /* 'min' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result < array[i] ? result : array[i];
|
||||
/* 'max' reductions. */
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (max:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result > array[i] ? result : array[i];
|
||||
|
||||
/* 'min' reductions. */
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (min:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result = result < array[i] ? result : array[i];
|
||||
|
||||
/* '&&' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (&&:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (&&:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult && (result > array[i]);
|
||||
|
||||
/* '||' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (||:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (||:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult || (result > array[i]);
|
||||
|
||||
|
@ -1,51 +1,35 @@
|
||||
/* { dg-require-effective-target alloca } */
|
||||
/* complex reductions. */
|
||||
|
||||
#define vl 32
|
||||
#define n 1000
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
const int n = 1000;
|
||||
int i;
|
||||
__complex__ double result, array[n];
|
||||
int lresult;
|
||||
|
||||
/* '+' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (+:result)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (+:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result += array[i];
|
||||
|
||||
/* Needs support for complex multiplication. */
|
||||
|
||||
// /* '*' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (*:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result *= array[i];
|
||||
//
|
||||
// /* 'max' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result > array[i] ? result : array[i];
|
||||
//
|
||||
// /* 'min' reductions. */
|
||||
// #pragma acc parallel vector_length (vl)
|
||||
// #pragma acc loop reduction (+:result)
|
||||
// for (i = 0; i < n; i++)
|
||||
// result = result < array[i] ? result : array[i];
|
||||
/* '*' reductions. */
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (*:result)
|
||||
for (i = 0; i < n; i++)
|
||||
result *= array[i];
|
||||
|
||||
/* '&&' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (&&:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (&&:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult && (__real__(result) > __real__(array[i]));
|
||||
|
||||
/* '||' reductions. */
|
||||
#pragma acc parallel vector_length (vl)
|
||||
#pragma acc loop reduction (||:lresult)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop gang worker vector reduction (||:lresult)
|
||||
for (i = 0; i < n; i++)
|
||||
lresult = lresult || (__real__(result) > __real__(array[i]));
|
||||
|
||||
|
@ -1,52 +1,118 @@
|
||||
/* Test invalid calls to routines. */
|
||||
|
||||
#pragma acc routine gang
|
||||
void gang (void) /* { dg-message "declared here" 3 } */
|
||||
int
|
||||
gang () /* { dg-message "declared here" 3 } */
|
||||
{
|
||||
#pragma acc loop gang worker vector
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#pragma acc routine worker
|
||||
void worker (void) /* { dg-message "declared here" 2 } */
|
||||
int
|
||||
worker () /* { dg-message "declared here" 2 } */
|
||||
{
|
||||
#pragma acc loop worker vector
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#pragma acc routine vector
|
||||
void vector (void) /* { dg-message "declared here" 1 } */
|
||||
int
|
||||
vector () /* { dg-message "declared here" } */
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#pragma acc routine seq
|
||||
void seq (void)
|
||||
int
|
||||
seq ()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main ()
|
||||
int
|
||||
main ()
|
||||
{
|
||||
|
||||
#pragma acc parallel num_gangs (32) num_workers (32) vector_length (32)
|
||||
int red = 0;
|
||||
#pragma acc parallel copy (red)
|
||||
{
|
||||
#pragma acc loop gang /* { dg-message "loop here" 1 } */
|
||||
/* Independent/seq loop tests. */
|
||||
#pragma acc loop reduction (+:red) // { dg-warning "insufficient partitioning" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
gang (); /* { dg-error "routine call uses same" } */
|
||||
worker ();
|
||||
vector ();
|
||||
seq ();
|
||||
}
|
||||
#pragma acc loop worker /* { dg-message "loop here" 2 } */
|
||||
red += gang ();
|
||||
|
||||
#pragma acc loop reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
gang (); /* { dg-error "routine call uses same" } */
|
||||
worker (); /* { dg-error "routine call uses same" } */
|
||||
vector ();
|
||||
seq ();
|
||||
}
|
||||
#pragma acc loop vector /* { dg-message "loop here" 3 } */
|
||||
red += worker ();
|
||||
|
||||
#pragma acc loop reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
gang (); /* { dg-error "routine call uses same" } */
|
||||
worker (); /* { dg-error "routine call uses same" } */
|
||||
vector (); /* { dg-error "routine call uses same" } */
|
||||
seq ();
|
||||
}
|
||||
red += vector ();
|
||||
|
||||
/* Gang routine tests. */
|
||||
#pragma acc loop gang reduction (+:red) /* { dg-message "containing loop" } */
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += gang (); // { dg-error "routine call uses same" }
|
||||
|
||||
#pragma acc loop worker reduction (+:red) /* { dg-message "containing loop" } */
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += gang (); // { dg-error "routine call uses same" }
|
||||
|
||||
#pragma acc loop vector reduction (+:red) /* { dg-message "containing loop" } */
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += gang (); // { dg-error "routine call uses same" }
|
||||
|
||||
/* Worker routine tests. */
|
||||
#pragma acc loop gang reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += worker ();
|
||||
|
||||
#pragma acc loop worker reduction (+:red) /* { dg-message "containing loop" } */
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += worker (); // { dg-error "routine call uses same" }
|
||||
|
||||
#pragma acc loop vector reduction (+:red) /* { dg-message "containing loop" } */
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += worker (); // { dg-error "routine call uses same" }
|
||||
|
||||
/* Vector routine tests. */
|
||||
#pragma acc loop gang reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += vector ();
|
||||
|
||||
#pragma acc loop worker reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += vector ();
|
||||
|
||||
#pragma acc loop vector reduction (+:red) /* { dg-message "containing loop" } */
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += vector (); // { dg-error "routine call uses same" }
|
||||
|
||||
/* Seq routine tests. */
|
||||
#pragma acc loop gang reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += seq ();
|
||||
|
||||
#pragma acc loop worker reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += seq ();
|
||||
|
||||
#pragma acc loop vector reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red += seq ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* Test invalid intra-routine parallelism. */
|
||||
|
||||
void gang (void);
|
||||
void worker (void);
|
||||
@ -14,6 +15,24 @@ void seq (void)
|
||||
worker (); /* { dg-error "routine call uses" } */
|
||||
vector (); /* { dg-error "routine call uses" } */
|
||||
seq ();
|
||||
|
||||
int red;
|
||||
|
||||
#pragma acc loop reduction (+:red) // { dg-warning "insufficient partitioning" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop gang reduction (+:red) // { dg-error "disallowed by containing routine" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop worker reduction (+:red) // { dg-error "disallowed by containing routine" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop vector reduction (+:red) // { dg-error "disallowed by containing routine" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
}
|
||||
|
||||
void vector (void) /* { dg-message "declared here" 1 } */
|
||||
@ -22,6 +41,24 @@ void vector (void) /* { dg-message "declared here" 1 } */
|
||||
worker (); /* { dg-error "routine call uses" } */
|
||||
vector ();
|
||||
seq ();
|
||||
|
||||
int red;
|
||||
|
||||
#pragma acc loop reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop gang reduction (+:red) // { dg-error "disallowed by containing routine" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop worker reduction (+:red) // { dg-error "disallowed by containing routine" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop vector reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
}
|
||||
|
||||
void worker (void) /* { dg-message "declared here" 2 } */
|
||||
@ -30,6 +67,24 @@ void worker (void) /* { dg-message "declared here" 2 } */
|
||||
worker ();
|
||||
vector ();
|
||||
seq ();
|
||||
|
||||
int red;
|
||||
|
||||
#pragma acc loop reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop gang reduction (+:red) // { dg-error "disallowed by containing routine" }
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop worker reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop vector reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
}
|
||||
|
||||
void gang (void) /* { dg-message "declared here" 3 } */
|
||||
@ -38,4 +93,22 @@ void gang (void) /* { dg-message "declared here" 3 } */
|
||||
worker ();
|
||||
vector ();
|
||||
seq ();
|
||||
|
||||
int red;
|
||||
|
||||
#pragma acc loop reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop gang reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop worker reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
|
||||
#pragma acc loop vector reduction (+:red)
|
||||
for (int i = 0; i < 10; i++)
|
||||
red ++;
|
||||
}
|
||||
|
@ -46,6 +46,21 @@ using namespace g;
|
||||
|
||||
#pragma acc routine (c) /* { dg-error "does not refer to" } */
|
||||
|
||||
|
||||
void Bar ();
|
||||
|
||||
void Foo ()
|
||||
{
|
||||
Bar ();
|
||||
}
|
||||
|
||||
#pragma acc routine (Bar) // { dg-error "must be applied before use" }
|
||||
|
||||
#pragma acc routine (Foo) gang // { dg-error "must be applied before definition" }
|
||||
|
||||
#pragma acc routine (Baz) // { dg-error "not been declared" }
|
||||
|
||||
|
||||
int vb1; /* { dg-error "directive for use" } */
|
||||
extern int vb2; /* { dg-error "directive for use" } */
|
||||
static int vb3; /* { dg-error "directive for use" } */
|
||||
|
@ -1,5 +1,3 @@
|
||||
/* { dg-do compile } */
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
@ -71,3 +69,259 @@ main ()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void par (void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc parallel
|
||||
{
|
||||
#pragma acc loop tile // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile() // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(1)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(2)
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop tile(-2) // { dg-warning "'tile' value must be positive" }
|
||||
for (i = 1; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(i)
|
||||
for (i = 1; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(2, 2, 1)
|
||||
for (i = 1; i < 3; i++)
|
||||
{
|
||||
for (j = 4; j < 6; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop tile(2, 2)
|
||||
for (i = 1; i < 5; i+=2)
|
||||
{
|
||||
for (j = i + 1; j < 7; j+=i)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop vector tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
}
|
||||
}
|
||||
void p3 (void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
|
||||
#pragma acc parallel loop tile // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop tile() // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop tile(1)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop tile(*, 1)
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc parallel loop tile(-2) // { dg-warning "'tile' value must be positive" }
|
||||
for (i = 1; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop tile(i)
|
||||
for (i = 1; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop tile(2, 2, 1)
|
||||
for (i = 1; i < 3; i++)
|
||||
{
|
||||
for (j = 4; j < 6; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc parallel loop tile(2, 2)
|
||||
for (i = 1; i < 5; i+=2)
|
||||
{
|
||||
for (j = i + 1; j < 7; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc parallel loop vector tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop vector gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop vector worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc parallel loop gang worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
kern (void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
#pragma acc loop tile // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile() // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(1)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(2)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(6-2)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(6+2)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(*, 1)
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
for (j = 0; j < 10; i++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc loop tile(-2) // { dg-warning "'tile' value must be positive" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(i)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(2, 2, 1)
|
||||
for (i = 2; i < 4; i++)
|
||||
for (i = 4; i < 6; i++)
|
||||
{ }
|
||||
#pragma acc loop tile(2, 2)
|
||||
for (i = 1; i < 5; i+=2)
|
||||
for (j = i+1; j < 7; i++)
|
||||
{ }
|
||||
#pragma acc loop vector tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop vector worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc loop gang worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void k3 (void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc kernels loop tile // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop tile() // { dg-error "expected" }
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop tile(1)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop tile(*, 1)
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
for (j = 1; j < 10; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc kernels loop tile(-2) // { dg-warning "'tile' value must be positive" }
|
||||
for (i = 1; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop tile(i)
|
||||
for (i = 1; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop tile(2, 2, 1)
|
||||
for (i = 1; i < 3; i++)
|
||||
{
|
||||
for (j = 4; j < 6; j++)
|
||||
{ }
|
||||
}
|
||||
#pragma acc kernels loop tile(2, 2)
|
||||
for (i = 1; i < 5; i++)
|
||||
{
|
||||
for (j = i + 1; j < 7; j += i)
|
||||
{ }
|
||||
}
|
||||
#pragma acc kernels loop vector tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop vector gang tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop vector worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
#pragma acc kernels loop gang worker tile(*)
|
||||
for (i = 0; i < 10; i++)
|
||||
{ }
|
||||
}
|
||||
|
39
gcc/testsuite/g++.dg/goacc/reference.C
Normal file
39
gcc/testsuite/g++.dg/goacc/reference.C
Normal file
@ -0,0 +1,39 @@
|
||||
int
|
||||
test1 (int &ref)
|
||||
{
|
||||
#pragma acc kernels copy (ref)
|
||||
{
|
||||
ref = 10;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
test2 (int &ref)
|
||||
{
|
||||
int b;
|
||||
#pragma acc kernels copyout (b)
|
||||
{
|
||||
b = ref + 10;
|
||||
}
|
||||
|
||||
#pragma acc parallel copyout (b)
|
||||
{
|
||||
b = ref + 10;
|
||||
}
|
||||
|
||||
ref = b;
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
int a = 0;
|
||||
int &ref_a = a;
|
||||
|
||||
#pragma acc parallel copy (a, ref_a)
|
||||
{
|
||||
ref_a = 5;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
13
gcc/testsuite/g++.dg/goacc/routine-1.C
Normal file
13
gcc/testsuite/g++.dg/goacc/routine-1.C
Normal file
@ -0,0 +1,13 @@
|
||||
/* Test valid use of the routine directive. */
|
||||
|
||||
namespace N
|
||||
{
|
||||
extern void foo1();
|
||||
extern void foo2();
|
||||
#pragma acc routine (foo1)
|
||||
#pragma acc routine
|
||||
void foo3()
|
||||
{
|
||||
}
|
||||
}
|
||||
#pragma acc routine (N::foo2)
|
42
gcc/testsuite/g++.dg/goacc/routine-2.C
Normal file
42
gcc/testsuite/g++.dg/goacc/routine-2.C
Normal file
@ -0,0 +1,42 @@
|
||||
/* Test invalid use of the routine directive. */
|
||||
|
||||
template <typename T>
|
||||
extern T one_d();
|
||||
#pragma acc routine (one_d) /* { dg-error "names a set of overloads" } */
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
one()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#pragma acc routine (one) /* { dg-error "names a set of overloads" } */
|
||||
|
||||
int incr (int);
|
||||
float incr (float);
|
||||
int inc;
|
||||
|
||||
#pragma acc routine (incr) /* { dg-error "names a set of overloads" } */
|
||||
|
||||
#pragma acc routine (increment) /* { dg-error "has not been declared" } */
|
||||
|
||||
#pragma acc routine (inc) /* { dg-error "does not refer to a function" } */
|
||||
|
||||
#pragma acc routine (+) /* { dg-error "expected unqualified-id before '.' token" } */
|
||||
|
||||
int sum (int, int);
|
||||
|
||||
namespace foo {
|
||||
#pragma acc routine (sum)
|
||||
int sub (int, int);
|
||||
}
|
||||
|
||||
#pragma acc routine (foo::sub)
|
||||
|
||||
/* It's strange to apply a routine directive to subset of overloaded
|
||||
functions, but that is permissible in OpenACC 2.x. */
|
||||
|
||||
int decr (int a);
|
||||
|
||||
#pragma acc routine
|
||||
float decr (float a);
|
@ -1,8 +1,3 @@
|
||||
// This error is temporary. Remove when support is added for these clauses
|
||||
// in the middle end. Also remove the comments from the reduction test
|
||||
// after the FE learns that reduction variables may appear in data clauses too.
|
||||
// { dg-prune-output "sorry, unimplemented" }
|
||||
|
||||
#pragma acc routine
|
||||
template <typename T> T
|
||||
accDouble(int val)
|
||||
@ -20,55 +15,62 @@ oacc_parallel_copy (T a)
|
||||
double z = 4;
|
||||
|
||||
#pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a)
|
||||
{
|
||||
#pragma acc loop gang worker vector
|
||||
for (int i = 0; i < 1; i++)
|
||||
b = a;
|
||||
}
|
||||
|
||||
#pragma acc parallel num_gangs (a) copy (w, x, y, z)
|
||||
{
|
||||
w = accDouble<char>(w);
|
||||
x = accDouble<int>(x);
|
||||
y = accDouble<float>(y);
|
||||
z = accDouble<double>(z);
|
||||
}
|
||||
#pragma acc loop
|
||||
for (int i = 0; i < 1; i++)
|
||||
{
|
||||
w = accDouble<char>(w);
|
||||
x = accDouble<int>(x);
|
||||
y = accDouble<float>(y);
|
||||
z = accDouble<double>(z);
|
||||
}
|
||||
|
||||
#pragma acc parallel num_gangs (a) if (1)
|
||||
{
|
||||
#pragma acc loop independent collapse (2) gang
|
||||
for (int i = 0; i < a; i++)
|
||||
for (int j = 0; j < 5; j++)
|
||||
b = a;
|
||||
|
||||
#pragma acc loop auto tile (a, 3)
|
||||
for (int i = 0; i < a; i++)
|
||||
for (int j = 0; j < 5; j++)
|
||||
b = a;
|
||||
for (int i = 0; i < a; i++)
|
||||
for (int j = 0; j < 5; j++)
|
||||
b = a;
|
||||
|
||||
#pragma acc loop seq
|
||||
for (int i = 0; i < a; i++)
|
||||
b = a;
|
||||
for (int i = 0; i < a; i++)
|
||||
b = a;
|
||||
}
|
||||
|
||||
T c;
|
||||
|
||||
#pragma acc parallel num_workers (10)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (int i = 0; i < 1; i++)
|
||||
{
|
||||
#pragma acc atomic capture
|
||||
c = b++;
|
||||
c = b++;
|
||||
|
||||
#pragma atomic update
|
||||
c++;
|
||||
c++;
|
||||
|
||||
#pragma acc atomic read
|
||||
b = a;
|
||||
b = a;
|
||||
|
||||
#pragma acc atomic write
|
||||
b = a;
|
||||
}
|
||||
b = a;
|
||||
}
|
||||
|
||||
//#pragma acc parallel reduction (+:c)
|
||||
// {
|
||||
// c = 1;
|
||||
// }
|
||||
#pragma acc parallel reduction (+:c)
|
||||
c = 1;
|
||||
|
||||
#pragma acc data if (1) copy (b)
|
||||
{
|
||||
#pragma acc parallel
|
||||
#pragma acc parallel
|
||||
{
|
||||
b = a;
|
||||
}
|
||||
@ -76,9 +78,9 @@ oacc_parallel_copy (T a)
|
||||
|
||||
#pragma acc enter data copyin (b)
|
||||
#pragma acc parallel present (b)
|
||||
{
|
||||
b = a;
|
||||
}
|
||||
{
|
||||
b = a;
|
||||
}
|
||||
|
||||
#pragma acc update host (b)
|
||||
#pragma acc update self (b)
|
||||
@ -109,11 +111,9 @@ oacc_kernels_copy (T a)
|
||||
#pragma acc kernels copyout (b) copyin (a)
|
||||
b = a;
|
||||
|
||||
//#pragma acc kernels loop reduction (+:c)
|
||||
// for (int i = 0; i < 10; i++)
|
||||
// {
|
||||
// c = 1;
|
||||
// }
|
||||
#pragma acc kernels loop reduction (+:c)
|
||||
for (int i = 0; i < 10; i++)
|
||||
c = 1;
|
||||
|
||||
#pragma acc data if (1) copy (b)
|
||||
{
|
||||
@ -125,9 +125,10 @@ oacc_kernels_copy (T a)
|
||||
|
||||
#pragma acc enter data copyin (b)
|
||||
#pragma acc kernels present (b)
|
||||
{
|
||||
b = a;
|
||||
}
|
||||
{
|
||||
b = a;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
|
@ -1,17 +1,10 @@
|
||||
! Exercise combined OpenACC directives.
|
||||
|
||||
! { dg-do compile }
|
||||
! { dg-options "-fopenacc -fdump-tree-gimple" }
|
||||
|
||||
! This error is temporary. Remove when support is added for these clauses
|
||||
! in the middle end.
|
||||
! { dg-prune-output "sorry, unimplemented" }
|
||||
|
||||
! Update the reduction tests.
|
||||
! { dg-additional-options "-fdump-tree-gimple" }
|
||||
|
||||
subroutine test
|
||||
implicit none
|
||||
integer a(100), i, j, z
|
||||
integer a(100), i, j, y, z
|
||||
|
||||
! PARALLEL
|
||||
|
||||
@ -73,10 +66,10 @@ subroutine test
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
! !$acc parallel loop reduction (+:z) copy (z)
|
||||
! do i = 1, 100
|
||||
! end do
|
||||
! !$acc end parallel loop
|
||||
!$acc parallel loop reduction (+:y) copy (y)
|
||||
do i = 1, 100
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
! KERNELS
|
||||
|
||||
@ -138,10 +131,10 @@ subroutine test
|
||||
end do
|
||||
!$acc end kernels loop
|
||||
|
||||
! !$acc kernels loop reduction (+:z) copy (z)
|
||||
! do i = 1, 100
|
||||
! end do
|
||||
! !$acc end kernels loop
|
||||
!$acc kernels loop reduction (+:y) copy (y)
|
||||
do i = 1, 100
|
||||
end do
|
||||
!$acc end kernels loop
|
||||
end subroutine test
|
||||
|
||||
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. collapse.2." 2 "gimple" } }
|
||||
@ -153,3 +146,5 @@ end subroutine test
|
||||
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
|
||||
! { dg-final { scan-tree-dump-times "acc loop private.i. reduction..:y." 2 "gimple" } }
|
||||
|
@ -1,5 +1,3 @@
|
||||
! { dg-do compile }
|
||||
! { dg-additional-options "-fmax-errors=100" }
|
||||
module test
|
||||
implicit none
|
||||
contains
|
||||
@ -29,14 +27,18 @@ subroutine test1
|
||||
i = i + 1
|
||||
end do
|
||||
!$acc loop
|
||||
do 300 d = 1, 30, 6 ! { dg-error "integer" }
|
||||
do 300 d = 1, 30, 6
|
||||
i = d
|
||||
300 a(i) = 1
|
||||
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
|
||||
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
|
||||
!$acc loop
|
||||
do d = 1, 30, 5 ! { dg-error "integer" }
|
||||
do d = 1, 30, 5
|
||||
i = d
|
||||
a(i) = 2
|
||||
end do
|
||||
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
|
||||
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
|
||||
!$acc loop
|
||||
do i = 1, 30
|
||||
if (i .eq. 16) exit ! { dg-error "EXIT statement" }
|
||||
@ -144,8 +146,10 @@ subroutine test1
|
||||
end do
|
||||
!$acc parallel loop collapse(2)
|
||||
do i = 1, 3
|
||||
do r = 4, 6 ! { dg-error "integer" }
|
||||
do r = 4, 6
|
||||
end do
|
||||
! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
|
||||
! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
|
||||
end do
|
||||
|
||||
! Both seq and independent are not allowed
|
||||
@ -167,4 +171,3 @@ subroutine test1
|
||||
|
||||
end subroutine test1
|
||||
end module test
|
||||
! { dg-prune-output "Deleted" }
|
||||
|
@ -1,9 +1,3 @@
|
||||
! { dg-do compile }
|
||||
! { dg-additional-options "-fmax-errors=100" }
|
||||
|
||||
! { dg-prune-output "sorry, unimplemented" }
|
||||
! { dg-prune-output "Error: work-sharing region" }
|
||||
|
||||
program test
|
||||
implicit none
|
||||
integer :: i, j
|
||||
|
@ -1,11 +1,3 @@
|
||||
! { dg-do compile }
|
||||
! { dg-additional-options "-fmax-errors=100" }
|
||||
|
||||
! This error is temporary. Remove when support is added for these clauses
|
||||
! in the middle end.
|
||||
! { dg-prune-output "sorry, unimplemented" }
|
||||
! { dg-prune-output "Error: work-sharing region" }
|
||||
|
||||
program test
|
||||
implicit none
|
||||
integer :: i, j
|
||||
|
@ -1,13 +1,7 @@
|
||||
! { dg-do compile }
|
||||
! { dg-additional-options "-fdump-tree-original -std=f2008" }
|
||||
|
||||
! test for tree-dump-original and spaces-commas
|
||||
|
||||
! This error is temporary. Remove when support is added for these clauses
|
||||
! in the middle end.
|
||||
! { dg-prune-output "sorry, unimplemented" }
|
||||
! { dg-prune-output "Error: work-sharing region" }
|
||||
|
||||
program test
|
||||
implicit none
|
||||
integer :: i, j, k, m, sum
|
||||
|
@ -1,3 +1,71 @@
|
||||
2016-03-30 Thomas Schwinge <thomas@codesourcery.com>
|
||||
James Norris <jnorris@codesourcery.com>
|
||||
Nathan Sidwell <nathan@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
Cesar Philippidis <cesar@codesourcery.com>
|
||||
Chung-Lin Tang <cltang@codesourcery.com>
|
||||
Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/vector-loop.c: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/declare-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise.
|
||||
XFAIL.
|
||||
* testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Update.
|
||||
Incorporate...
|
||||
* testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c: ... this
|
||||
file.
|
||||
* testsuite/libgomp.oacc-c++/template-reduction.C: New file.
|
||||
* testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/private-variables.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/routine-4.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/clauses-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/default-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/firstprivate-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/gang-static-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/if-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/pr68813.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Merge this
|
||||
file...
|
||||
* testsuite/libgomp.oacc-c-c++-common/parallel-1.c: ..., and this
|
||||
file into...
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-clauses.h: ... this new
|
||||
file. Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c: New
|
||||
file.
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-2.c: Rename to...
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c:
|
||||
... this new file. Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/parallel-2.c: Rename to...
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c:
|
||||
... this new file. Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: New
|
||||
file. Incorporate...
|
||||
* testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c: ... this
|
||||
file, and...
|
||||
* testsuite/libgomp.oacc-c-c++-common/worker-single-4.c: ... this
|
||||
file, and...
|
||||
* testsuite/libgomp.oacc-c-c++-common/worker-single-6.c: ... this
|
||||
file.
|
||||
* testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Remove file.
|
||||
|
||||
2016-03-29 Thomas Schwinge <thomas@codesourcery.com>
|
||||
|
||||
* testsuite/libgomp.oacc-c++/c++.exp [!lang_test_file_found]: Call
|
||||
|
98
libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
Normal file
98
libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
Normal file
@ -0,0 +1,98 @@
|
||||
const int n = 100;
|
||||
|
||||
// Check explicit template copy map
|
||||
|
||||
template<typename T> T
|
||||
sum (T array[])
|
||||
{
|
||||
T s = 0;
|
||||
|
||||
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
|
||||
for (int i = 0; i < n; i++)
|
||||
s += array[i];
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
// Check implicit template copy map
|
||||
|
||||
template<typename T> T
|
||||
sum ()
|
||||
{
|
||||
T s = 0;
|
||||
T array[n];
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
array[i] = i+1;
|
||||
|
||||
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
|
||||
for (int i = 0; i < n; i++)
|
||||
s += array[i];
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
// Check present and async
|
||||
|
||||
template<typename T> T
|
||||
async_sum (T array[])
|
||||
{
|
||||
T s = 0;
|
||||
|
||||
#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n])
|
||||
for (int i = 0; i < n; i++)
|
||||
array[i] = i+1;
|
||||
|
||||
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
|
||||
for (int i = 0; i < n; i++)
|
||||
s += array[i];
|
||||
|
||||
#pragma acc wait
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
// Check present and async and an explicit firstprivate
|
||||
|
||||
template<typename T> T
|
||||
async_sum (int c)
|
||||
{
|
||||
T s = 0;
|
||||
|
||||
#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1)
|
||||
for (int i = 0; i < n; i++)
|
||||
s += i+c;
|
||||
|
||||
#pragma acc wait
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
int a[n];
|
||||
int result = 0;
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
a[i] = i+1;
|
||||
result += i+1;
|
||||
}
|
||||
|
||||
if (sum (a) != result)
|
||||
__builtin_abort ();
|
||||
|
||||
if (sum<int> () != result)
|
||||
__builtin_abort ();
|
||||
|
||||
#pragma acc enter data copyin (a)
|
||||
if (async_sum (a) != result)
|
||||
__builtin_abort ();
|
||||
|
||||
if (async_sum<int> (1) != result)
|
||||
__builtin_abort ();
|
||||
#pragma acc exit data delete (a)
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,4 +1,6 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* <http://news.gmane.org/find-root.php?message_id=%3C87pp0aaksc.fsf%40kepler.schwinge.homeip.net%3E>.
|
||||
{ dg-xfail-run-if "TODO" { *-*-* } } */
|
||||
/* { dg-additional-options "-lcuda" } */
|
||||
|
||||
#include <openacc.h>
|
||||
@ -444,6 +446,438 @@ main (int argc, char **argv)
|
||||
|
||||
#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 5.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 25.0)
|
||||
abort ();
|
||||
|
||||
if (c[i] != 4.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 3.0;
|
||||
b[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc wait
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 3.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 3.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 2.0;
|
||||
b[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 2.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 2.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 3.0;
|
||||
b[i] = 0.0;
|
||||
c[i] = 0.0;
|
||||
d[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
|
||||
}
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 3.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 9.0)
|
||||
abort ();
|
||||
|
||||
if (c[i] != 4.0)
|
||||
abort ();
|
||||
|
||||
if (d[i] != 1.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 2.0;
|
||||
b[i] = 0.0;
|
||||
c[i] = 0.0;
|
||||
d[i] = 0.0;
|
||||
e[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels wait (1) async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
|
||||
}
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 2.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 4.0)
|
||||
abort ();
|
||||
|
||||
if (c[i] != 4.0)
|
||||
abort ();
|
||||
|
||||
if (d[i] != 1.0)
|
||||
abort ();
|
||||
|
||||
if (e[i] != 11.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
|
||||
if (r != CUDA_SUCCESS)
|
||||
{
|
||||
fprintf (stderr, "cuStreamCreate failed: %d\n", r);
|
||||
abort ();
|
||||
}
|
||||
|
||||
acc_set_cuda_stream (1, stream1);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 5.0;
|
||||
b[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 5.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 5.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 7.0;
|
||||
b[i] = 0.0;
|
||||
c[i] = 0.0;
|
||||
d[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
|
||||
}
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 7.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 49.0)
|
||||
abort ();
|
||||
|
||||
if (c[i] != 4.0)
|
||||
abort ();
|
||||
|
||||
if (d[i] != 1.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 3.0;
|
||||
b[i] = 0.0;
|
||||
c[i] = 0.0;
|
||||
d[i] = 0.0;
|
||||
e[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels wait (1) async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
|
||||
}
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 3.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 9.0)
|
||||
abort ();
|
||||
|
||||
if (c[i] != 4.0)
|
||||
abort ();
|
||||
|
||||
if (d[i] != 1.0)
|
||||
abort ();
|
||||
|
||||
if (e[i] != 17.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 4.0;
|
||||
b[i] = 0.0;
|
||||
c[i] = 0.0;
|
||||
d[i] = 0.0;
|
||||
e[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 4.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 16.0)
|
||||
abort ();
|
||||
|
||||
if (c[i] != 4.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 5.0;
|
||||
b[i] = 0.0;
|
||||
c[i] = 0.0;
|
||||
d[i] = 0.0;
|
||||
e[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
|
||||
{
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc kernels async (1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
|
||||
|
||||
#pragma acc wait (1)
|
||||
|
||||
}
|
||||
|
@ -578,6 +578,32 @@ main (int argc, char **argv)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
if (acc_is_present (&b[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 6.0;
|
||||
b[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc parallel pcopy (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != 6.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do run { target lto } } */
|
||||
/* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */
|
||||
|
||||
#include "parallel-1.c"
|
||||
#include "data-clauses-kernels.c"
|
@ -0,0 +1,2 @@
|
||||
#define CONSTRUCT kernels
|
||||
#include "data-clauses.h"
|
@ -1,4 +1,4 @@
|
||||
/* { dg-do run { target lto } } */
|
||||
/* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */
|
||||
|
||||
#include "kernels-1.c"
|
||||
#include "data-clauses-parallel.c"
|
@ -0,0 +1,2 @@
|
||||
#define CONSTRUCT parallel
|
||||
#include "data-clauses.h"
|
@ -1,7 +1,3 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
int i;
|
||||
|
||||
int main(void)
|
||||
@ -11,145 +7,145 @@ int main(void)
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copyin (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
#if ACC_MEM_SHARED
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#else
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copyout (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copy (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) create (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
#if ACC_MEM_SHARED
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#else
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#if ACC_MEM_SHARED
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#else
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copy (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_create (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#if ACC_MEM_SHARED
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#else
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
i = -1;
|
||||
@ -158,23 +154,23 @@ int main(void)
|
||||
|
||||
#pragma acc data copyin (i, j)
|
||||
{
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
}
|
||||
#if ACC_MEM_SHARED
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#else
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
i = -1;
|
||||
@ -183,23 +179,23 @@ int main(void)
|
||||
|
||||
#pragma acc data copyin(i, j)
|
||||
{
|
||||
#pragma acc parallel /* copyout */ present_or_copyout (v)
|
||||
#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
v = 1;
|
||||
}
|
||||
}
|
||||
#if ACC_MEM_SHARED
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#else
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
return 0;
|
@ -1,5 +1,3 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
int main (void)
|
||||
@ -16,6 +14,27 @@ int main (void)
|
||||
a_2 = &a;
|
||||
}
|
||||
|
||||
if (a != A)
|
||||
abort ();
|
||||
if (a_1 != a)
|
||||
abort ();
|
||||
#if ACC_MEM_SHARED
|
||||
if (a_2 != &a)
|
||||
abort ();
|
||||
#else
|
||||
if (a_2 == &a)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
a_1 = a_2 = 0;
|
||||
|
||||
#pragma acc data deviceptr (a)
|
||||
#pragma acc parallel copyout (a_1, a_2)
|
||||
{
|
||||
a_1 = a;
|
||||
a_2 = &a;
|
||||
}
|
||||
|
||||
if (a != A)
|
||||
abort ();
|
||||
if (a_1 != a)
|
||||
|
@ -1,8 +1,7 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <openacc.h>
|
||||
|
||||
int main ()
|
||||
|
||||
void t1 ()
|
||||
{
|
||||
int ok = 1;
|
||||
int val = 2;
|
||||
@ -28,14 +27,115 @@ int main ()
|
||||
if (ondev)
|
||||
{
|
||||
if (!ok)
|
||||
return 1;
|
||||
__builtin_abort ();
|
||||
if (val != 2)
|
||||
return 1;
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i < 32; i++)
|
||||
if (ary[i] != 2 + i)
|
||||
return 1;
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void t2 ()
|
||||
{
|
||||
int ok = 1;
|
||||
int val = 2;
|
||||
|
||||
#pragma acc data copy(val)
|
||||
{
|
||||
#pragma acc parallel present (val)
|
||||
{
|
||||
val = 7;
|
||||
}
|
||||
|
||||
#pragma acc parallel firstprivate (val) copy(ok)
|
||||
{
|
||||
ok = val == 7;
|
||||
val = 9;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ok)
|
||||
__builtin_abort ();
|
||||
if (val != 7)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
|
||||
#define N 100
|
||||
void t3 ()
|
||||
{
|
||||
int a, b[N], c, d, i;
|
||||
int n = acc_get_device_type () == acc_device_nvidia ? N : 1;
|
||||
|
||||
a = 5;
|
||||
for (i = 0; i < n; i++)
|
||||
b[i] = -1;
|
||||
|
||||
#pragma acc parallel num_gangs (n) firstprivate (a)
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
a = a + i;
|
||||
b[i] = a;
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (a + i != b[i])
|
||||
__builtin_abort ();
|
||||
|
||||
#pragma acc data copy (a)
|
||||
{
|
||||
#pragma acc parallel firstprivate (a) copyout (c)
|
||||
{
|
||||
a = 10;
|
||||
c = a;
|
||||
}
|
||||
|
||||
/* This version of 'a' should still be 5. */
|
||||
#pragma acc parallel copyout (d) present (a)
|
||||
{
|
||||
d = a;
|
||||
}
|
||||
}
|
||||
|
||||
if (c != 10)
|
||||
__builtin_abort ();
|
||||
if (d != 5)
|
||||
__builtin_abort ();
|
||||
}
|
||||
#undef N
|
||||
|
||||
|
||||
void t4 ()
|
||||
{
|
||||
int x = 5, i, arr[32];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 3;
|
||||
|
||||
#pragma acc parallel firstprivate(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] += x;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
if (arr[i] != 8)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
t1 ();
|
||||
t2 ();
|
||||
t3 ();
|
||||
t4 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,31 +0,0 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <openacc.h>
|
||||
|
||||
int main ()
|
||||
{
|
||||
int ok = 1;
|
||||
int val = 2;
|
||||
|
||||
#pragma acc data copy(val)
|
||||
{
|
||||
#pragma acc parallel present (val)
|
||||
{
|
||||
val = 7;
|
||||
}
|
||||
|
||||
#pragma acc parallel firstprivate (val) copy(ok)
|
||||
{
|
||||
ok = val == 7;
|
||||
val = 9;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!ok)
|
||||
return 1;
|
||||
if(val != 7)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
48
libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c
Normal file
48
libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c
Normal file
@ -0,0 +1,48 @@
|
||||
#include <assert.h>
|
||||
|
||||
#define N 100
|
||||
|
||||
void
|
||||
test (int *a, int *b, int sarg)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
assert (a[i] == b[i] + sarg);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int a[N], b[N];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = i+1;
|
||||
|
||||
#pragma acc parallel loop gang (static:*) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = b[i] + 0;
|
||||
|
||||
test (a, b, 0);
|
||||
|
||||
#pragma acc parallel loop gang (static:1) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = b[i] + 1;
|
||||
|
||||
test (a, b, 1);
|
||||
|
||||
#pragma acc parallel loop gang (static:5) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = b[i] + 5;
|
||||
|
||||
test (a, b, 5);
|
||||
|
||||
#pragma acc parallel loop gang (static:20) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = b[i] + 20;
|
||||
|
||||
test (a, b, 20);
|
||||
|
||||
return 0;
|
||||
}
|
100
libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
Normal file
100
libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
Normal file
@ -0,0 +1,100 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* This code uses nvptx inline assembly guarded with acc_on_device, which is
|
||||
not optimized away at -O0, and then confuses the target assembler.
|
||||
{ dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
|
||||
|
||||
#include <assert.h>
|
||||
#include <openacc.h>
|
||||
|
||||
#define N 100
|
||||
|
||||
#define GANG_ID(I) \
|
||||
(acc_on_device (acc_device_nvidia) \
|
||||
? ({unsigned __r; \
|
||||
__asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \
|
||||
__r; }) : (I))
|
||||
|
||||
int
|
||||
test_static(int *a, int num_gangs, int sarg)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
if (sarg == 0)
|
||||
sarg = 1;
|
||||
|
||||
for (i = 0; i < N / sarg; i++)
|
||||
for (j = 0; j < sarg; j++)
|
||||
assert (a[i*sarg+j] == i % num_gangs);
|
||||
}
|
||||
|
||||
int
|
||||
test_nonstatic(int *a, int gangs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < N; i+=gangs)
|
||||
for (j = 0; j < gangs; j++)
|
||||
assert (a[i+j] == i/gangs);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int a[N];
|
||||
int i, x;
|
||||
|
||||
#pragma acc parallel loop gang (static:*) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_nonstatic (a, 10);
|
||||
|
||||
#pragma acc parallel loop gang (static:1) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_static (a, 10, 1);
|
||||
|
||||
#pragma acc parallel loop gang (static:2) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_static (a, 10, 2);
|
||||
|
||||
#pragma acc parallel loop gang (static:5) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_static (a, 10, 5);
|
||||
|
||||
#pragma acc parallel loop gang (static:20) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_static (a, 10, 20);
|
||||
|
||||
/* Non-static gang. */
|
||||
#pragma acc parallel loop gang num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_nonstatic (a, 10);
|
||||
|
||||
/* Static arguments with a variable expression. */
|
||||
|
||||
x = 20;
|
||||
#pragma acc parallel loop gang (static:0+x) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_static (a, 10, 20);
|
||||
|
||||
x = 20;
|
||||
#pragma acc parallel loop gang (static:x) num_gangs (10)
|
||||
for (i = 0; i < 100; i++)
|
||||
a[i] = GANG_ID (i);
|
||||
|
||||
test_static (a, 10, 20);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,5 +1,3 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <openacc.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
@ -608,5 +606,357 @@ main(int argc, char **argv)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 4.0;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 5.0;
|
||||
#else
|
||||
exp = 4.0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != exp)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 16.0;
|
||||
|
||||
#pragma acc kernels if(0)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != 17.0)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 8.0;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 9.0;
|
||||
#else
|
||||
exp = 8.0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != exp)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 22.0;
|
||||
|
||||
#pragma acc kernels if(zero)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != 23.0)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 16.0;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(true)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 17.0;
|
||||
#else
|
||||
exp = 16.0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != exp)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 76.0;
|
||||
|
||||
#pragma acc kernels if(false)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != 77.0)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 22.0;
|
||||
|
||||
n = 1;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 23.0;
|
||||
#else
|
||||
exp = 22.0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != exp)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 18.0;
|
||||
|
||||
n = 0;
|
||||
|
||||
#pragma acc kernels if(n)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != 19.0)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 49.0;
|
||||
|
||||
n = 1;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n + n)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 50.0;
|
||||
#else
|
||||
exp = 49.0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != exp)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 38.0;
|
||||
|
||||
n = 0;
|
||||
|
||||
#pragma acc kernels if(n + n)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != 39.0)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 91.0;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(-2)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 92.0;
|
||||
#else
|
||||
exp = 91.0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != exp)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 43.0;
|
||||
|
||||
#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 44.0;
|
||||
#else
|
||||
exp = 43.0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != exp)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 87.0;
|
||||
|
||||
#pragma acc kernels if(one == 0)
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
b[ii] = a[ii] + 1;
|
||||
else
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (b[i] != 88.0)
|
||||
abort();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 3.0;
|
||||
b[i] = 9.0;
|
||||
}
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 0.0;
|
||||
exp2 = 0.0;
|
||||
#else
|
||||
acc_map_data (a, d_a, N * sizeof (float));
|
||||
acc_map_data (b, d_b, N * sizeof (float));
|
||||
exp = 3.0;
|
||||
exp2 = 9.0;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,184 +0,0 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
int i;
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int j, v;
|
||||
|
||||
#if 0
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != -1 || j != -2)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1)
|
||||
abort ();
|
||||
#if ACC_MEM_SHARED
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
#else
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j)
|
||||
{
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1)
|
||||
abort ();
|
||||
#if ACC_MEM_SHARED
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
#else
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
i = -1;
|
||||
j = -2;
|
||||
v = 0;
|
||||
#pragma acc kernels /* copyout */ present_or_copyout (v)
|
||||
{
|
||||
if (i != -1 || j != -2)
|
||||
abort ();
|
||||
i = 2;
|
||||
j = 1;
|
||||
if (i != 2 || j != 1)
|
||||
abort ();
|
||||
v = 1;
|
||||
}
|
||||
if (v != 1 || i != 2 || j != 1)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
/* Exercise the auto, independent, seq and tile loop clauses inside
|
||||
kernels regions. */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define N 100
|
||||
|
||||
void
|
||||
check (int *a, int *b)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
assert (a[i] == b[i]);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i, a[N], b[N];
|
||||
|
||||
#pragma acc kernels copy(a)
|
||||
{
|
||||
#pragma acc loop auto
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = i;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = i;
|
||||
|
||||
check (a, b);
|
||||
|
||||
#pragma acc kernels copyout(a)
|
||||
{
|
||||
#pragma acc loop independent
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = i;
|
||||
}
|
||||
|
||||
check (a, b);
|
||||
|
||||
#pragma acc kernels present_or_copy(a)
|
||||
{
|
||||
#pragma acc loop seq
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = i;
|
||||
}
|
||||
|
||||
check (a, b);
|
||||
|
||||
#pragma acc kernels pcopyout(a) present_or_copyin(b)
|
||||
{
|
||||
#pragma acc loop seq
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = b[i];
|
||||
}
|
||||
|
||||
check (a, b);
|
||||
|
||||
return 0;
|
||||
}
|
895
libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c
Normal file
895
libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c
Normal file
@ -0,0 +1,895 @@
|
||||
/* Miscellaneous test cases for gang/worker/vector mode transitions. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <openacc.h>
|
||||
|
||||
|
||||
/* Test basic vector-partitioned mode transitions. */
|
||||
|
||||
void t1()
|
||||
{
|
||||
int n = 0, arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) \
|
||||
num_gangs(1) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
n++;
|
||||
#pragma acc loop vector
|
||||
for (j = 0; j < 32; j++)
|
||||
arr[j]++;
|
||||
n++;
|
||||
}
|
||||
|
||||
assert (n == 2);
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test vector-partitioned, gang-partitioned mode. */
|
||||
|
||||
void t2()
|
||||
{
|
||||
int n[32], arr[1024], i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
n[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) \
|
||||
num_gangs(32) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j, k;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k]++;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (n[i] == 2);
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test conditions inside vector-partitioned loops. */
|
||||
|
||||
void t4()
|
||||
{
|
||||
int n[32], arr[1024], i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
n[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) \
|
||||
num_gangs(32) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j, k;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
if ((arr[j * 32 + k] % 2) != 0)
|
||||
arr[j * 32 + k] *= 2;
|
||||
}
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (n[i] == 2);
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
|
||||
}
|
||||
|
||||
|
||||
/* Test conditions inside gang-partitioned/vector-partitioned loops. */
|
||||
|
||||
void t5()
|
||||
{
|
||||
int n[32], arr[1024], i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
n[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) \
|
||||
num_gangs(32) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop gang vector
|
||||
for (j = 0; j < 1024; j++)
|
||||
if ((arr[j] % 2) != 0)
|
||||
arr[j] *= 2;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (n[i] == 2);
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
|
||||
}
|
||||
|
||||
|
||||
/* Test trivial operation of vector-single mode. */
|
||||
|
||||
void t7()
|
||||
{
|
||||
int n = 0;
|
||||
#pragma acc parallel copy(n) \
|
||||
num_gangs(1) num_workers(1) vector_length(32)
|
||||
{
|
||||
n++;
|
||||
}
|
||||
assert (n == 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test vector-single, gang-partitioned mode. */
|
||||
|
||||
void t8()
|
||||
{
|
||||
int arr[1024];
|
||||
int gangs;
|
||||
|
||||
for (gangs = 1; gangs <= 1024; gangs <<= 1)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(gangs) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 1024; j++)
|
||||
arr[j]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test conditions in vector-single mode. */
|
||||
|
||||
void t9()
|
||||
{
|
||||
int arr[1024];
|
||||
int gangs;
|
||||
|
||||
for (gangs = 1; gangs <= 1024; gangs <<= 1)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(gangs) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 1024; j++)
|
||||
if ((j % 3) == 0)
|
||||
arr[j]++;
|
||||
else
|
||||
arr[j] += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == ((i % 3) == 0) ? 1 : 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test switch in vector-single mode. */
|
||||
|
||||
void t10()
|
||||
{
|
||||
int arr[1024];
|
||||
int gangs;
|
||||
|
||||
for (gangs = 1; gangs <= 1024; gangs <<= 1)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(gangs) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 1024; j++)
|
||||
switch (j % 5)
|
||||
{
|
||||
case 0: arr[j] += 1; break;
|
||||
case 1: arr[j] += 2; break;
|
||||
case 2: arr[j] += 3; break;
|
||||
case 3: arr[j] += 4; break;
|
||||
case 4: arr[j] += 5; break;
|
||||
default: arr[j] += 99;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == (i % 5) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test switch in vector-single mode, initialise array on device. */
|
||||
|
||||
void t11()
|
||||
{
|
||||
int arr[1024];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = 99;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(1024) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
/* This loop and the one following must be distributed to available gangs
|
||||
in the same way to ensure data dependencies are not violated (hence the
|
||||
"static" clauses). */
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 1024; j++)
|
||||
arr[j] = 0;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 1024; j++)
|
||||
switch (j % 5)
|
||||
{
|
||||
case 0: arr[j] += 1; break;
|
||||
case 1: arr[j] += 2; break;
|
||||
case 2: arr[j] += 3; break;
|
||||
case 3: arr[j] += 4; break;
|
||||
case 4: arr[j] += 5; break;
|
||||
default: arr[j] += 99;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == (i % 5) + 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test multiple conditions in vector-single mode. */
|
||||
|
||||
#define NUM_GANGS 4096
|
||||
void t12()
|
||||
{
|
||||
bool fizz[NUM_GANGS], buzz[NUM_GANGS], fizzbuzz[NUM_GANGS];
|
||||
int i;
|
||||
|
||||
#pragma acc parallel copyout(fizz, buzz, fizzbuzz) \
|
||||
num_gangs(NUM_GANGS) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
/* This loop and the one following must be distributed to available gangs
|
||||
in the same way to ensure data dependencies are not violated (hence the
|
||||
"static" clauses). */
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < NUM_GANGS; j++)
|
||||
fizz[j] = buzz[j] = fizzbuzz[j] = 0;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < NUM_GANGS; j++)
|
||||
{
|
||||
if ((j % 3) == 0 && (j % 5) == 0)
|
||||
fizzbuzz[j] = 1;
|
||||
else
|
||||
{
|
||||
if ((j % 3) == 0)
|
||||
fizz[j] = 1;
|
||||
else if ((j % 5) == 0)
|
||||
buzz[j] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_GANGS; i++)
|
||||
{
|
||||
assert (fizzbuzz[i] == ((i % 3) == 0 && (i % 5) == 0));
|
||||
assert (fizz[i] == ((i % 3) == 0 && (i % 5) != 0));
|
||||
assert (buzz[i] == ((i % 3) != 0 && (i % 5) == 0));
|
||||
}
|
||||
}
|
||||
#undef NUM_GANGS
|
||||
|
||||
|
||||
/* Test worker-partitioned/vector-single mode. */
|
||||
|
||||
void t13()
|
||||
{
|
||||
int arr[32 * 8], i;
|
||||
|
||||
for (i = 0; i < 32 * 8; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 8; k++)
|
||||
arr[j * 8 + k] += j * 8 + k;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 8; i++)
|
||||
assert (arr[i] == i);
|
||||
}
|
||||
|
||||
|
||||
/* Test worker-single/worker-partitioned transitions. */
|
||||
|
||||
void t16()
|
||||
{
|
||||
int n[32], arr[32 * 32], i;
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
n[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) \
|
||||
num_gangs(8) num_workers(16) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k]++;
|
||||
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k]++;
|
||||
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k]++;
|
||||
|
||||
n[j]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (n[i] == 4);
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == 3);
|
||||
}
|
||||
|
||||
|
||||
/* Test correct synchronisation between worker-partitioned loops. */
|
||||
|
||||
void t17()
|
||||
{
|
||||
int arr_a[32 * 32], arr_b[32 * 32], i;
|
||||
int num_workers, num_gangs;
|
||||
|
||||
for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
|
||||
for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
|
||||
{
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr_a[i] = i;
|
||||
|
||||
#pragma acc parallel copyin(arr_a) copyout(arr_b) \
|
||||
num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 32; k++)
|
||||
arr_b[j * 32 + (31 - k)] = arr_a[j * 32 + k] * 2;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 32; k++)
|
||||
arr_a[j * 32 + (31 - k)] = arr_b[j * 32 + k] * 2;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 32; k++)
|
||||
arr_b[j * 32 + (31 - k)] = arr_a[j * 32 + k] * 2;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr_b[i] == (i ^ 31) * 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test correct synchronisation between worker+vector-partitioned loops. */
|
||||
|
||||
void t18()
|
||||
{
|
||||
int arr_a[32 * 32 * 32], arr_b[32 * 32 * 32], i;
|
||||
int num_workers, num_gangs;
|
||||
|
||||
for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
|
||||
for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
|
||||
{
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr_a[i] = i;
|
||||
|
||||
#pragma acc parallel copyin(arr_a) copyout(arr_b) \
|
||||
num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
#pragma acc loop worker vector
|
||||
for (k = 0; k < 32 * 32; k++)
|
||||
arr_b[j * 32 * 32 + (1023 - k)] = arr_a[j * 32 * 32 + k] * 2;
|
||||
|
||||
#pragma acc loop worker vector
|
||||
for (k = 0; k < 32 * 32; k++)
|
||||
arr_a[j * 32 * 32 + (1023 - k)] = arr_b[j * 32 * 32 + k] * 2;
|
||||
|
||||
#pragma acc loop worker vector
|
||||
for (k = 0; k < 32 * 32; k++)
|
||||
arr_b[j * 32 * 32 + (1023 - k)] = arr_a[j * 32 * 32 + k] * 2;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
assert (arr_b[i] == (i ^ 1023) * 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test correct synchronisation between vector-partitioned loops in
|
||||
worker-partitioned mode. */
|
||||
|
||||
void t19()
|
||||
{
|
||||
int n[32 * 32], arr_a[32 * 32 * 32], arr_b[32 * 32 * 32], i;
|
||||
int num_workers, num_gangs;
|
||||
|
||||
for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
|
||||
for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
|
||||
{
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr_a[i] = i;
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
n[i] = 0;
|
||||
|
||||
#pragma acc parallel copy (n) copyin(arr_a) copyout(arr_b) \
|
||||
num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
int m;
|
||||
|
||||
n[j * 32 + k]++;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
{
|
||||
if (((j * 1024 + k * 32 + m) % 2) == 0)
|
||||
arr_b[j * 1024 + k * 32 + (31 - m)]
|
||||
= arr_a[j * 1024 + k * 32 + m] * 2;
|
||||
else
|
||||
arr_b[j * 1024 + k * 32 + (31 - m)]
|
||||
= arr_a[j * 1024 + k * 32 + m] * 3;
|
||||
}
|
||||
|
||||
/* Test returning to vector-single mode... */
|
||||
n[j * 32 + k]++;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
{
|
||||
if (((j * 1024 + k * 32 + m) % 3) == 0)
|
||||
arr_a[j * 1024 + k * 32 + (31 - m)]
|
||||
= arr_b[j * 1024 + k * 32 + m] * 5;
|
||||
else
|
||||
arr_a[j * 1024 + k * 32 + (31 - m)]
|
||||
= arr_b[j * 1024 + k * 32 + m] * 7;
|
||||
}
|
||||
|
||||
/* ...and back-to-back vector loops. */
|
||||
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
{
|
||||
if (((j * 1024 + k * 32 + m) % 2) == 0)
|
||||
arr_b[j * 1024 + k * 32 + (31 - m)]
|
||||
= arr_a[j * 1024 + k * 32 + m] * 3;
|
||||
else
|
||||
arr_b[j * 1024 + k * 32 + (31 - m)]
|
||||
= arr_a[j * 1024 + k * 32 + m] * 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (n[i] == 2);
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
{
|
||||
int m = 6 * ((i % 3) == 0 ? 5 : 7);
|
||||
assert (arr_b[i] == (i ^ 31) * m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* With -O0, variables are on the stack, not in registers. Check that worker
|
||||
state propagation handles the stack frame. */
|
||||
|
||||
void t20()
|
||||
{
|
||||
int w0 = 0;
|
||||
int w1 = 0;
|
||||
int w2 = 0;
|
||||
int w3 = 0;
|
||||
int w4 = 0;
|
||||
int w5 = 0;
|
||||
int w6 = 0;
|
||||
int w7 = 0;
|
||||
|
||||
int i;
|
||||
|
||||
#pragma acc parallel copy (w0, w1, w2, w3, w4, w5, w6, w7) \
|
||||
num_gangs (1) num_workers (8)
|
||||
{
|
||||
int internal = 100;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
switch (i)
|
||||
{
|
||||
case 0: w0 = internal; break;
|
||||
case 1: w1 = internal; break;
|
||||
case 2: w2 = internal; break;
|
||||
case 3: w3 = internal; break;
|
||||
case 4: w4 = internal; break;
|
||||
case 5: w5 = internal; break;
|
||||
case 6: w6 = internal; break;
|
||||
case 7: w7 = internal; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (w0 != 100
|
||||
|| w1 != 100
|
||||
|| w2 != 100
|
||||
|| w3 != 100
|
||||
|| w4 != 100
|
||||
|| w5 != 100
|
||||
|| w6 != 100
|
||||
|| w7 != 100)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
|
||||
/* Test worker-single/vector-single mode. */
|
||||
|
||||
void t21()
|
||||
{
|
||||
int arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
arr[j]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test worker-single/vector-single mode. */
|
||||
|
||||
void t22()
|
||||
{
|
||||
int arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[j]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test condition in worker-single/vector-single mode. */
|
||||
|
||||
void t23()
|
||||
{
|
||||
int arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
if ((arr[j] % 2) != 0)
|
||||
arr[j]++;
|
||||
else
|
||||
arr[j] += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == ((i % 2) != 0) ? i + 1 : i + 2);
|
||||
}
|
||||
|
||||
|
||||
/* Test switch in worker-single/vector-single mode. */
|
||||
|
||||
void t24()
|
||||
{
|
||||
int arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
switch (arr[j] % 5)
|
||||
{
|
||||
case 0: arr[j] += 1; break;
|
||||
case 1: arr[j] += 2; break;
|
||||
case 2: arr[j] += 3; break;
|
||||
case 3: arr[j] += 4; break;
|
||||
case 4: arr[j] += 5; break;
|
||||
default: arr[j] += 99;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == i + (i % 5) + 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test worker-single/vector-partitioned mode. */
|
||||
|
||||
void t25()
|
||||
{
|
||||
int arr[32 * 32], i;
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[j * 32 + k]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test worker-single, vector-partitioned, gang-redundant mode. */
|
||||
|
||||
#define ACTUAL_GANGS 8
|
||||
void t27()
|
||||
{
|
||||
int n, arr[32], i;
|
||||
int ondev;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
n = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) copyout(ondev) \
|
||||
num_gangs(ACTUAL_GANGS) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
ondev = acc_on_device (acc_device_not_host);
|
||||
|
||||
#pragma acc atomic
|
||||
n++;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[j] += 1;
|
||||
}
|
||||
|
||||
#pragma acc atomic
|
||||
n++;
|
||||
}
|
||||
|
||||
int m = ondev ? ACTUAL_GANGS : 1;
|
||||
|
||||
assert (n == m * 2);
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == m);
|
||||
}
|
||||
#undef ACTUAL_GANGS
|
||||
|
||||
|
||||
/* Check if worker-single variables get broadcastd to vectors. */
|
||||
|
||||
#pragma acc routine
|
||||
float t28_routine ()
|
||||
{
|
||||
return 2.71;
|
||||
}
|
||||
|
||||
#define N 32
|
||||
void t28()
|
||||
{
|
||||
float threads[N], v1 = 3.14;
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
threads[i] = -1;
|
||||
|
||||
#pragma acc parallel num_gangs (1) vector_length (32) copy (v1)
|
||||
{
|
||||
float val = t28_routine ();
|
||||
|
||||
#pragma acc loop vector
|
||||
for (int i = 0; i < N; i++)
|
||||
threads[i] = val + v1*i;
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
assert (fabs (threads[i] - (t28_routine () + v1*i)) < 0.0001);
|
||||
}
|
||||
#undef N
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
t1();
|
||||
t2();
|
||||
t4();
|
||||
t5();
|
||||
t7();
|
||||
t8();
|
||||
t9();
|
||||
t10();
|
||||
t11();
|
||||
t12();
|
||||
t13();
|
||||
t16();
|
||||
t17();
|
||||
t18();
|
||||
t19();
|
||||
t20();
|
||||
t21();
|
||||
t22();
|
||||
t23();
|
||||
t24();
|
||||
t25();
|
||||
t27();
|
||||
t28();
|
||||
|
||||
return 0;
|
||||
}
|
953
libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c
Normal file
953
libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c
Normal file
@ -0,0 +1,953 @@
|
||||
#include <assert.h>
|
||||
#include <openacc.h>
|
||||
|
||||
typedef struct {
|
||||
int x, y;
|
||||
} vec2;
|
||||
|
||||
typedef struct {
|
||||
int x, y, z;
|
||||
int attr[13];
|
||||
} vec3_attr;
|
||||
|
||||
|
||||
/* Test of gang-private variables declared in local scope with parallel
|
||||
directive. */
|
||||
|
||||
void local_g_1()
|
||||
{
|
||||
int i, arr[32];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 3;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
int x;
|
||||
|
||||
#pragma acc loop gang(static:1)
|
||||
for (i = 0; i < 32; i++)
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop gang(static:1)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
x = i * 2;
|
||||
arr[i] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == 3 + i * 2);
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Back-to-back worker loops. */
|
||||
|
||||
void local_w_1()
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Successive vector loops. */
|
||||
|
||||
void local_w_2()
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
|
||||
x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Aggregate worker variable. */
|
||||
|
||||
void local_w_3()
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
vec2 pt;
|
||||
|
||||
pt.x = i ^ j * 3;
|
||||
pt.y = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.x * k;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Addressable worker variable. */
|
||||
|
||||
void local_w_4()
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
vec2 pt, *ptp;
|
||||
|
||||
ptp = &pt;
|
||||
|
||||
pt.x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += ptp->x * k;
|
||||
|
||||
ptp->y = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Array worker variable. */
|
||||
|
||||
void local_w_5()
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int pt[2];
|
||||
|
||||
pt[0] = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
||||
|
||||
pt[1] = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private variables declared on loop directive. */
|
||||
|
||||
void loop_g_1()
|
||||
{
|
||||
int x = 5, i, arr[32];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
x = i * 2;
|
||||
arr[i] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == i * 3);
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private variables declared on loop directive, with broadcasting
|
||||
to partitioned workers. */
|
||||
|
||||
void loop_g_2()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 2);
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private variables declared on loop directive, with broadcasting
|
||||
to partitioned vectors. */
|
||||
|
||||
void loop_g_3()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 2);
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private addressable variable declared on loop directive, with
|
||||
broadcasting to partitioned workers. */
|
||||
|
||||
void loop_g_4()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int *p = &x;
|
||||
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x;
|
||||
|
||||
(*p)--;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 2);
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private array variable declared on loop directive, with
|
||||
broadcasting to partitioned workers. */
|
||||
|
||||
void loop_g_5()
|
||||
{
|
||||
int x[8], i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
for (int j = 0; j < 8; j++)
|
||||
x[j] = j * 2;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x[j % 8];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i % 8) * 2);
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private aggregate variable declared on loop directive, with
|
||||
broadcasting to partitioned workers. */
|
||||
|
||||
void loop_g_6()
|
||||
{
|
||||
int i, arr[32 * 32];
|
||||
vec3_attr pt;
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang private(pt)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
pt.x = i;
|
||||
pt.y = i * 2;
|
||||
pt.z = i * 4;
|
||||
pt.attr[5] = i * 6;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 13);
|
||||
}
|
||||
|
||||
|
||||
/* Test of vector-private variables declared on loop directive. */
|
||||
|
||||
void loop_v_1()
|
||||
{
|
||||
int x, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
#pragma acc loop vector private(x)
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
x = i ^ j * 3;
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
|
||||
#pragma acc loop vector private(x)
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
x = i | j * 5;
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of vector-private variables declared on loop directive. Array type. */
|
||||
|
||||
void loop_v_2()
|
||||
{
|
||||
int pt[2], i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
#pragma acc loop vector private(pt)
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
pt[0] = i ^ j * 3;
|
||||
pt[1] = i | j * 5;
|
||||
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
||||
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive. */
|
||||
|
||||
void loop_w_1()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
x = i ^ j * 3;
|
||||
/* Try to ensure 'x' accesses doesn't get optimized into a
|
||||
temporary. */
|
||||
__asm__ __volatile__ ("");
|
||||
arr[i * 32 + j] += x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. */
|
||||
|
||||
void loop_w_2()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Back-to-back worker loops. */
|
||||
|
||||
void loop_w_3()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
|
||||
#pragma acc loop worker private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Successive vector loops. */
|
||||
|
||||
void loop_w_4()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
|
||||
x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Addressable worker variable. */
|
||||
|
||||
void loop_w_5()
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int *p = &x;
|
||||
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
|
||||
*p = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Aggregate worker variable. */
|
||||
|
||||
void loop_w_6()
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
vec2 pt;
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker private(pt)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
pt.x = i ^ j * 3;
|
||||
pt.y = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.x * k;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of worker-private variables declared on loop directive, broadcasting
|
||||
to vector-partitioned mode. Array worker variable. */
|
||||
|
||||
void loop_w_7()
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
int pt[2];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
/* "pt" is treated as "present_or_copy" on the parallel directive because it
|
||||
is an array variable. */
|
||||
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
/* But here, it is made private per-worker. */
|
||||
#pragma acc loop worker private(pt)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
pt[0] = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
||||
|
||||
pt[1] = i | j * 5;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private variables declared on the parallel directive. */
|
||||
|
||||
void parallel_g_1()
|
||||
{
|
||||
int x = 5, i, arr[32];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 3;
|
||||
|
||||
#pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang(static:1)
|
||||
for (i = 0; i < 32; i++)
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop gang(static:1)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
if (acc_on_device (acc_device_host))
|
||||
x = i * 2;
|
||||
arr[i] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == 3 + i * 2);
|
||||
}
|
||||
|
||||
|
||||
/* Test of gang-private array variable declared on the parallel directive. */
|
||||
|
||||
void parallel_g_2()
|
||||
{
|
||||
int x[32], i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) vector_length(32)
|
||||
{
|
||||
#pragma acc loop gang
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int j;
|
||||
for (j = 0; j < 32; j++)
|
||||
x[j] = j * 2;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x[31 - j];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (31 - (i % 32)) * 2);
|
||||
}
|
||||
|
||||
|
||||
int main ()
|
||||
{
|
||||
local_g_1();
|
||||
local_w_1();
|
||||
local_w_2();
|
||||
local_w_3();
|
||||
local_w_4();
|
||||
local_w_5();
|
||||
loop_g_1();
|
||||
loop_g_2();
|
||||
loop_g_3();
|
||||
loop_g_4();
|
||||
loop_g_5();
|
||||
loop_g_6();
|
||||
loop_v_1();
|
||||
loop_v_2();
|
||||
loop_w_1();
|
||||
loop_w_2();
|
||||
loop_w_3();
|
||||
loop_w_4();
|
||||
loop_w_5();
|
||||
loop_w_6();
|
||||
loop_w_7();
|
||||
parallel_g_1();
|
||||
parallel_g_2();
|
||||
|
||||
return 0;
|
||||
}
|
129
libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
Normal file
129
libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
Normal file
@ -0,0 +1,129 @@
|
||||
/* Tests of reduction on loop directive. */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
/* Test of reduction on loop directive (gangs, non-private reduction
|
||||
variable). */
|
||||
|
||||
void g_np_1()
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
|
||||
res = hres = 1;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang reduction(*:res)
|
||||
for (i = 0; i < 12; i++)
|
||||
res *= arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 12; i++)
|
||||
hres *= arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
}
|
||||
|
||||
|
||||
/* Test of reduction on loop directive (gangs and vectors, non-private
|
||||
reduction variable). */
|
||||
|
||||
void gv_np_1()
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
}
|
||||
|
||||
|
||||
/* Test of reduction on loop directive (gangs and workers, non-private
|
||||
reduction variable). */
|
||||
|
||||
void gw_np_1()
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang worker reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
}
|
||||
|
||||
|
||||
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
|
||||
reduction variable). */
|
||||
|
||||
void gwv_np_1()
|
||||
{
|
||||
int i, arr[1024], res = 0, hres = 0;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
|
||||
copy(res)
|
||||
{
|
||||
#pragma acc loop gang worker vector reduction(+:res)
|
||||
for (i = 0; i < 1024; i++)
|
||||
res += arr[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
hres += arr[i];
|
||||
|
||||
assert (res == hres);
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
g_np_1();
|
||||
gv_np_1();
|
||||
gw_np_1();
|
||||
gwv_np_1();
|
||||
|
||||
return 0;
|
||||
}
|
88
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c
Normal file
88
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c
Normal file
@ -0,0 +1,88 @@
|
||||
// { dg-additional-options "-fno-exceptions" }
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#pragma acc routine
|
||||
int fact(int n)
|
||||
{
|
||||
if (n == 0 || n == 1)
|
||||
return 1;
|
||||
else
|
||||
return n * fact (n - 1);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int *s, *g, *w, *v, *gw, *gv, *wv, *gwv, i, n = 10;
|
||||
|
||||
s = (int *) malloc (sizeof (int) * n);
|
||||
g = (int *) malloc (sizeof (int) * n);
|
||||
w = (int *) malloc (sizeof (int) * n);
|
||||
v = (int *) malloc (sizeof (int) * n);
|
||||
gw = (int *) malloc (sizeof (int) * n);
|
||||
gv = (int *) malloc (sizeof (int) * n);
|
||||
wv = (int *) malloc (sizeof (int) * n);
|
||||
gwv = (int *) malloc (sizeof (int) * n);
|
||||
|
||||
#pragma acc parallel loop async copyout(s[0:n]) seq
|
||||
for (i = 0; i < n; i++)
|
||||
s[i] = fact (i);
|
||||
|
||||
#pragma acc parallel loop async copyout(g[0:n]) gang
|
||||
for (i = 0; i < n; i++)
|
||||
g[i] = fact (i);
|
||||
|
||||
#pragma acc parallel loop async copyout(w[0:n]) worker
|
||||
for (i = 0; i < n; i++)
|
||||
w[i] = fact (i);
|
||||
|
||||
#pragma acc parallel loop async copyout(v[0:n]) vector
|
||||
for (i = 0; i < n; i++)
|
||||
v[i] = fact (i);
|
||||
|
||||
#pragma acc parallel loop async copyout(gw[0:n]) gang worker
|
||||
for (i = 0; i < n; i++)
|
||||
gw[i] = fact (i);
|
||||
|
||||
#pragma acc parallel loop async copyout(gv[0:n]) gang vector
|
||||
for (i = 0; i < n; i++)
|
||||
gv[i] = fact (i);
|
||||
|
||||
#pragma acc parallel loop async copyout(wv[0:n]) worker vector
|
||||
for (i = 0; i < n; i++)
|
||||
wv[i] = fact (i);
|
||||
|
||||
#pragma acc parallel loop async copyout(gwv[0:n]) gang worker vector
|
||||
for (i = 0; i < n; i++)
|
||||
gwv[i] = fact (i);
|
||||
|
||||
#pragma acc wait
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (s[i] != fact (i))
|
||||
abort ();
|
||||
for (i = 0; i < n; i++)
|
||||
if (g[i] != s[i])
|
||||
abort ();
|
||||
for (i = 0; i < n; i++)
|
||||
if (w[i] != s[i])
|
||||
abort ();
|
||||
for (i = 0; i < n; i++)
|
||||
if (v[i] != s[i])
|
||||
abort ();
|
||||
for (i = 0; i < n; i++)
|
||||
if (gw[i] != s[i])
|
||||
abort ();
|
||||
for (i = 0; i < n; i++)
|
||||
if (gv[i] != s[i])
|
||||
abort ();
|
||||
for (i = 0; i < n; i++)
|
||||
if (wv[i] != s[i])
|
||||
abort ();
|
||||
for (i = 0; i < n; i++)
|
||||
if (gwv[i] != s[i])
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
123
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c
Normal file
123
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c
Normal file
@ -0,0 +1,123 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define M 8
|
||||
#define N 32
|
||||
|
||||
#pragma acc routine vector
|
||||
void
|
||||
vector (int *a)
|
||||
{
|
||||
int i;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] -= a[i];
|
||||
}
|
||||
|
||||
#pragma acc routine worker
|
||||
void
|
||||
worker (int *b)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (j = 0; j < M; j++)
|
||||
b[i * M + j] += b[i * M + j];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc routine gang
|
||||
void
|
||||
gang (int *a)
|
||||
{
|
||||
int i;
|
||||
|
||||
#pragma acc loop gang worker vector
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] -= i;
|
||||
}
|
||||
|
||||
#pragma acc routine seq
|
||||
void
|
||||
seq (int *a)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] += 1;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
int a[N];
|
||||
int b[M * N];
|
||||
|
||||
i = 0;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 0;
|
||||
|
||||
#pragma acc parallel copy (a[0:N])
|
||||
{
|
||||
#pragma acc loop seq
|
||||
for (i = 0; i < N; i++)
|
||||
seq (&a[0]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != N)
|
||||
abort ();
|
||||
}
|
||||
|
||||
#pragma acc parallel copy (a[0:N])
|
||||
{
|
||||
#pragma acc loop seq
|
||||
for (i = 0; i < N; i++)
|
||||
gang (&a[0]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != N + (N * (-1 * i)))
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = i;
|
||||
|
||||
#pragma acc parallel copy (b[0:M*N])
|
||||
{
|
||||
worker (&b[0]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != i)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = i;
|
||||
|
||||
#pragma acc parallel copy (a[0:N])
|
||||
{
|
||||
#pragma acc loop
|
||||
for (i = 0; i < N; i++)
|
||||
vector (&a[0]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
76
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
Normal file
76
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
Normal file
@ -0,0 +1,76 @@
|
||||
/* This code uses nvptx inline assembly guarded with acc_on_device, which is
|
||||
not optimized away at -O0, and then confuses the target assembler.
|
||||
{ dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <openacc.h>
|
||||
|
||||
#define NUM_WORKERS 16
|
||||
#define NUM_VECTORS 32
|
||||
#define WIDTH 64
|
||||
#define HEIGHT 32
|
||||
|
||||
#define WORK_ID(I,N) \
|
||||
(acc_on_device (acc_device_nvidia) \
|
||||
? ({unsigned __r; \
|
||||
__asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r)); \
|
||||
__r; }) : (I % N))
|
||||
#define VEC_ID(I,N) \
|
||||
(acc_on_device (acc_device_nvidia) \
|
||||
? ({unsigned __r; \
|
||||
__asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r)); \
|
||||
__r; }) : (I % N))
|
||||
|
||||
#pragma acc routine worker
|
||||
void __attribute__ ((noinline))
|
||||
WorkVec (int *ptr, int w, int h, int nw, int nv)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (int i = 0; i < h; i++)
|
||||
#pragma acc loop vector
|
||||
for (int j = 0; j < w; j++)
|
||||
ptr[i*w + j] = (WORK_ID (i, nw) << 8) | VEC_ID(j, nv);
|
||||
}
|
||||
|
||||
int DoWorkVec (int nw)
|
||||
{
|
||||
int ary[HEIGHT][WIDTH];
|
||||
int err = 0;
|
||||
|
||||
for (int ix = 0; ix != HEIGHT; ix++)
|
||||
for (int jx = 0; jx != WIDTH; jx++)
|
||||
ary[ix][jx] = 0xdeadbeef;
|
||||
|
||||
printf ("spawning %d ...", nw); fflush (stdout);
|
||||
|
||||
#pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary)
|
||||
{
|
||||
WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS);
|
||||
}
|
||||
|
||||
for (int ix = 0; ix != HEIGHT; ix++)
|
||||
for (int jx = 0; jx != WIDTH; jx++)
|
||||
{
|
||||
int exp = ((ix % nw) << 8) | (jx % NUM_VECTORS);
|
||||
|
||||
if (ary[ix][jx] != exp)
|
||||
{
|
||||
printf ("\nary[%d][%d] = %#x expected %#x", ix, jx,
|
||||
ary[ix][jx], exp);
|
||||
err = 1;
|
||||
}
|
||||
}
|
||||
printf (err ? " failed\n" : " ok\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
for (int W = 1; W <= NUM_WORKERS; W <<= 1)
|
||||
err |= DoWorkVec (W);
|
||||
|
||||
return err;
|
||||
}
|
@ -1,361 +0,0 @@
|
||||
/* Copy of update-1.c with self exchanged with host for #pragma acc update. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
|
||||
|
||||
#include <openacc.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
int N = 8;
|
||||
int NDIV2 = N / 2;
|
||||
float *a, *b, *c;
|
||||
float *d_a, *d_b, *d_c;
|
||||
int i;
|
||||
|
||||
a = (float *) malloc (N * sizeof (float));
|
||||
b = (float *) malloc (N * sizeof (float));
|
||||
c = (float *) malloc (N * sizeof (float));
|
||||
|
||||
d_a = (float *) acc_malloc (N * sizeof (float));
|
||||
d_b = (float *) acc_malloc (N * sizeof (float));
|
||||
d_c = (float *) acc_malloc (N * sizeof (float));
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 3.0;
|
||||
b[i] = 0.0;
|
||||
}
|
||||
|
||||
acc_map_data (a, d_a, N * sizeof (float));
|
||||
acc_map_data (b, d_b, N * sizeof (float));
|
||||
acc_map_data (c, d_c, N * sizeof (float));
|
||||
|
||||
#pragma acc update device (a[0:N], b[0:N])
|
||||
|
||||
#pragma acc parallel present (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update self (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 3.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 3.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
if (!acc_is_present (&b[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 5.0;
|
||||
b[i] = 1.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:N], b[0:N])
|
||||
|
||||
#pragma acc parallel present (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update self (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 5.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 5.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
if (!acc_is_present (&b[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 5.0;
|
||||
b[i] = 1.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:N], b[0:N])
|
||||
|
||||
#pragma acc parallel present (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update host (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 5.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 5.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
if (!acc_is_present (&b[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 6.0;
|
||||
b[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 9.0;
|
||||
}
|
||||
|
||||
#pragma acc parallel present (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update self (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 6.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 6.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
if (!acc_is_present (&b[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 7.0;
|
||||
b[i] = 2.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 9.0;
|
||||
}
|
||||
|
||||
#pragma acc parallel present (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update self (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 7.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 7.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 9.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:N])
|
||||
|
||||
#pragma acc parallel present (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update self (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 9.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 9.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
if (!acc_is_present (&b[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 5.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:N])
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 6.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:NDIV2])
|
||||
|
||||
#pragma acc parallel present (a[0:N], b[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
b[ii] = a[ii];
|
||||
}
|
||||
|
||||
#pragma acc update self (a[0:N], b[0:N])
|
||||
|
||||
for (i = 0; i < NDIV2; i++)
|
||||
{
|
||||
if (a[i] != 6.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 6.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = NDIV2; i < N; i++)
|
||||
{
|
||||
if (a[i] != 5.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 5.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (!acc_is_present (&a[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
if (!acc_is_present (&b[0], (N * sizeof (float))))
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc update device (a[0:4])
|
||||
|
||||
#pragma acc parallel present (a[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
a[ii] = a[ii] + 1.0;
|
||||
}
|
||||
|
||||
#pragma acc update self (a[4:4])
|
||||
|
||||
for (i = 0; i < NDIV2; i++)
|
||||
{
|
||||
if (a[i] != 0.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = NDIV2; i < N; i++)
|
||||
{
|
||||
if (a[i] != 6.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
#pragma acc update self (a[0:4])
|
||||
|
||||
for (i = 0; i < NDIV2; i++)
|
||||
{
|
||||
if (a[i] != 1.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = NDIV2; i < N; i++)
|
||||
{
|
||||
if (a[i] != 6.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
a[2] = 9;
|
||||
a[3] = 9;
|
||||
a[4] = 9;
|
||||
a[5] = 9;
|
||||
|
||||
#pragma acc update device (a[2:4])
|
||||
|
||||
#pragma acc parallel present (a[0:N])
|
||||
{
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < N; ii++)
|
||||
a[ii] = a[ii] + 1.0;
|
||||
}
|
||||
|
||||
#pragma acc update self (a[2:4])
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (a[i] != 1.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 2; i < 6; i++)
|
||||
{
|
||||
if (a[i] != 10.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 6; i < N; i++)
|
||||
{
|
||||
if (a[i] != 6.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -20,7 +20,7 @@ main (void)
|
||||
|
||||
#pragma acc parallel vector_length (32) copyin (a,b) copyout (c)
|
||||
{
|
||||
#pragma acc loop /* vector clause is missing, since it's not yet supported. */
|
||||
#pragma acc loop vector
|
||||
for (unsigned int i = 0; i < n; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
@ -1,28 +0,0 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test worker-single/vector-single mode. */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[j]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == 1);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test worker-single/vector-partitioned mode. */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) num_gangs(1) num_workers(8) vector_length(32)
|
||||
{
|
||||
int k;
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[k]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == i + 1);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(ACC_DEVICE_TYPE_host)
|
||||
#define ACTUAL_GANGS 1
|
||||
#else
|
||||
#define ACTUAL_GANGS 8
|
||||
#endif
|
||||
|
||||
/* Test worker-single, vector-partitioned, gang-redundant mode. */
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int n, arr[32], i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
n = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) num_gangs(ACTUAL_GANGS) num_workers(8) \
|
||||
vector_length(32)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc atomic
|
||||
n++;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[j] += 1;
|
||||
}
|
||||
|
||||
#pragma acc atomic
|
||||
n++;
|
||||
}
|
||||
|
||||
assert (n == ACTUAL_GANGS * 2);
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == ACTUAL_GANGS);
|
||||
|
||||
return 0;
|
||||
}
|
@ -132,4 +132,126 @@ program asyncwait
|
||||
if (d(i) .ne. 1.0) call abort
|
||||
if (e(i) .ne. 11.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc data copy (a(1:N)) copy (b(1:N))
|
||||
|
||||
!$acc kernels async
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc wait
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 3.0) call abort
|
||||
if (b(i) .ne. 3.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 2.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc data copy (a(1:N)) copy (b(1:N))
|
||||
|
||||
!$acc kernels async (1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc wait (1)
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 2.0) call abort
|
||||
if (b(i) .ne. 2.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 0.0
|
||||
c(:) = 0.0
|
||||
d(:) = 0.0
|
||||
|
||||
!$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N))
|
||||
|
||||
!$acc kernels async (1)
|
||||
do i = 1, N
|
||||
b(i) = (a(i) * a(i) * a(i)) / a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels async (1)
|
||||
do i = 1, N
|
||||
c(i) = (a(i) * 4) / a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels async (1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc wait (1)
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 3.0) call abort
|
||||
if (b(i) .ne. 9.0) call abort
|
||||
if (c(i) .ne. 4.0) call abort
|
||||
if (d(i) .ne. 1.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 2.0
|
||||
b(:) = 0.0
|
||||
c(:) = 0.0
|
||||
d(:) = 0.0
|
||||
e(:) = 0.0
|
||||
|
||||
!$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
|
||||
|
||||
!$acc kernels async (1)
|
||||
do i = 1, N
|
||||
b(i) = (a(i) * a(i) * a(i)) / a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels async (1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
c(i) = (a(i) * 4) / a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels async (1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels wait (1) async (1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
e(i) = a(i) + b(i) + c(i) + d(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc wait (1)
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 2.0) call abort
|
||||
if (b(i) .ne. 4.0) call abort
|
||||
if (c(i) .ne. 4.0) call abort
|
||||
if (d(i) .ne. 1.0) call abort
|
||||
if (e(i) .ne. 11.0) call abort
|
||||
end do
|
||||
end program asyncwait
|
||||
|
@ -1,6 +1,6 @@
|
||||
! { dg-do run }
|
||||
|
||||
program parallel_wait
|
||||
program asyncwait
|
||||
integer, parameter :: N = 64
|
||||
real, allocatable :: a(:), b(:), c(:)
|
||||
integer i
|
||||
@ -30,6 +30,31 @@ program parallel_wait
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (c(i) .ne. 2.0) call abort
|
||||
end do
|
||||
|
||||
!$acc kernels async (0)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
a(i) = 1
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels async (1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
b(i) = 1
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels wait (0, 1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
c(i) = a(i) + b(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (c(i) .ne. 2.0) call abort
|
||||
end do
|
||||
@ -37,4 +62,4 @@ program parallel_wait
|
||||
deallocate (a)
|
||||
deallocate (b)
|
||||
deallocate (c)
|
||||
end program parallel_wait
|
||||
end program asyncwait
|
||||
|
@ -1,6 +1,6 @@
|
||||
! { dg-do run }
|
||||
|
||||
program parallel_wait
|
||||
program asyncwait
|
||||
integer, parameter :: N = 64
|
||||
real, allocatable :: a(:), b(:), c(:)
|
||||
integer i
|
||||
@ -32,6 +32,33 @@ program parallel_wait
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (c(i) .ne. 2.0) call abort
|
||||
end do
|
||||
|
||||
!$acc kernels async (0)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
a(i) = 1
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc kernels async (1)
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
b(i) = 1
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc wait (0, 1)
|
||||
|
||||
!$acc kernels
|
||||
!$acc loop
|
||||
do i = 1, N
|
||||
c(i) = a(i) + b(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (c(i) .ne. 2.0) call abort
|
||||
end do
|
||||
@ -39,4 +66,4 @@ program parallel_wait
|
||||
deallocate (a)
|
||||
deallocate (b)
|
||||
deallocate (c)
|
||||
end program parallel_wait
|
||||
end program asyncwait
|
||||
|
290
libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90
Normal file
290
libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90
Normal file
@ -0,0 +1,290 @@
|
||||
! { dg-do run }
|
||||
! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
|
||||
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
|
||||
integer, parameter :: N = 32
|
||||
real, allocatable :: a(:), b(:), c(:)
|
||||
integer i
|
||||
|
||||
i = 0
|
||||
|
||||
allocate (a(N))
|
||||
allocate (b(N))
|
||||
allocate (c(N))
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 3.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 5.0
|
||||
b(:) = 1.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 5.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 6.0
|
||||
b(:) = 0.0
|
||||
|
||||
call acc_copyin (a, sizeof (a))
|
||||
|
||||
a(:) = 9.0
|
||||
|
||||
!$acc parallel present_or_copyin (a(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 6.0) call abort
|
||||
end do
|
||||
|
||||
call acc_copyout (a, sizeof (a))
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 6.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 6.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 5.0
|
||||
b(:) = 2.0
|
||||
|
||||
call acc_copyin (b, sizeof (b))
|
||||
|
||||
!$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 5.0) call abort
|
||||
if (b(i) .ne. 2.0) call abort
|
||||
end do
|
||||
|
||||
call acc_copyout (b, sizeof (b))
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 3.0;
|
||||
b(:) = 4.0;
|
||||
|
||||
!$acc parallel copy (a(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
a(i) = a(i) + 1
|
||||
b(i) = a(i) + 2
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 4.0) call abort
|
||||
if (b(i) .ne. 6.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 4.0
|
||||
b(:) = 7.0
|
||||
|
||||
!$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
|
||||
do i = 1, N
|
||||
a(i) = a(i) + 1
|
||||
b(i) = b(i) + 2
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 5.0) call abort
|
||||
if (b(i) .ne. 9.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 7.0
|
||||
|
||||
call acc_copyin (a, sizeof (a))
|
||||
call acc_copyin (b, sizeof (b))
|
||||
|
||||
!$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
|
||||
do i = 1, N
|
||||
a(i) = a(i) + 1
|
||||
b(i) = b(i) + 2
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 3.0) call abort
|
||||
if (b(i) .ne. 7.0) call abort
|
||||
end do
|
||||
|
||||
call acc_copyout (a, sizeof (a))
|
||||
call acc_copyout (b, sizeof (b))
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 7.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) create (c(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
c(i) = a(i)
|
||||
b(i) = c(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 3.0) call abort
|
||||
if (b(i) .ne. 3.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (c) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 4.0
|
||||
b(:) = 8.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) present_or_create (c(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
c(i) = a(i)
|
||||
b(i) = c(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 4.0) call abort
|
||||
if (b(i) .ne. 4.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (c) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 4.0
|
||||
|
||||
call acc_copyin (a, sizeof (a))
|
||||
call acc_copyin (b, sizeof (b))
|
||||
call acc_copyin (c, sizeof (c))
|
||||
|
||||
!$acc parallel present (a(1:N)) present (c(1:N)) present (b(1:N))
|
||||
do i = 1, N
|
||||
c(i) = a(i)
|
||||
b(i) = c(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
call acc_copyout (a, sizeof (a))
|
||||
call acc_copyout (b, sizeof (b))
|
||||
call acc_copyout (c, sizeof (c))
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 4.0) call abort
|
||||
if (b(i) .ne. 4.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (c) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 6.0
|
||||
b(:) = 0.0
|
||||
|
||||
call acc_copyin (a, sizeof (a))
|
||||
|
||||
a(:) = 9.0
|
||||
|
||||
!$acc parallel pcopyin (a(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 6.0) call abort
|
||||
end do
|
||||
|
||||
call acc_copyout (a, sizeof (a))
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 6.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) pcopyout (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 6.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
|
||||
a(:) = 5.0
|
||||
b(:) = 7.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) pcreate (c(1:N)) copyout (b(1:N))
|
||||
do i = 1, N
|
||||
c(i) = a(i)
|
||||
b(i) = c(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 5.0) call abort
|
||||
if (b(i) .ne. 5.0) call abort
|
||||
end do
|
||||
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (c) .eqv. .TRUE.) call abort
|
||||
|
||||
end program main
|
@ -1,29 +1,22 @@
|
||||
! { dg-do run { target openacc_nvidia_accel_selected } }
|
||||
|
||||
! Tests to exercise the declare directive along with
|
||||
! the clauses: copy
|
||||
! copyin
|
||||
! copyout
|
||||
! create
|
||||
! present
|
||||
! present_or_copy
|
||||
! present_or_copyin
|
||||
! present_or_copyout
|
||||
! present_or_create
|
||||
|
||||
module vars
|
||||
implicit none
|
||||
integer z
|
||||
!$acc declare create (z)
|
||||
end module vars
|
||||
|
||||
subroutine subr6 (a, d)
|
||||
implicit none
|
||||
integer, parameter :: N = 8
|
||||
integer :: i
|
||||
integer :: a(N)
|
||||
!$acc declare deviceptr (a)
|
||||
integer :: d(N)
|
||||
|
||||
i = 0
|
||||
|
||||
!$acc parallel copy (d)
|
||||
do i = 1, N
|
||||
d(i) = a(i) + a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
end subroutine
|
||||
|
||||
subroutine subr5 (a, b, c, d)
|
||||
implicit none
|
||||
integer, parameter :: N = 8
|
||||
@ -201,15 +194,6 @@ subroutine subr0 (a, b, c, d)
|
||||
if (d(i) .ne. 13) call abort
|
||||
end do
|
||||
|
||||
call subr6 (a, d)
|
||||
|
||||
call test (a, .true.)
|
||||
call test (d, .false.)
|
||||
|
||||
do i = 1, N
|
||||
if (d(i) .ne. 16) call abort
|
||||
end do
|
||||
|
||||
end subroutine
|
||||
|
||||
program main
|
||||
@ -241,8 +225,7 @@ program main
|
||||
if (a(i) .ne. 8) call abort
|
||||
if (b(i) .ne. 8) call abort
|
||||
if (c(i) .ne. 8) call abort
|
||||
if (d(i) .ne. 16) call abort
|
||||
if (d(i) .ne. 13) call abort
|
||||
end do
|
||||
|
||||
|
||||
end program
|
||||
|
54
libgomp/testsuite/libgomp.oacc-fortran/default-1.f90
Normal file
54
libgomp/testsuite/libgomp.oacc-fortran/default-1.f90
Normal file
@ -0,0 +1,54 @@
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
implicit none
|
||||
real a, b
|
||||
real c
|
||||
!$acc declare create (c)
|
||||
|
||||
a = 2.0
|
||||
b = 0.0
|
||||
|
||||
!$acc parallel copy (a) create (b) default (none)
|
||||
b = a
|
||||
a = 1.0
|
||||
a = a + b
|
||||
!$acc end parallel
|
||||
|
||||
if (a .ne. 3.0) call abort
|
||||
|
||||
!$acc kernels copy (a) create (b) default (none)
|
||||
b = a
|
||||
a = 1.0
|
||||
a = a + b
|
||||
!$acc end kernels
|
||||
|
||||
if (a .ne. 4.0) call abort
|
||||
|
||||
!$acc parallel default (none) copy (a) create (b)
|
||||
b = a
|
||||
a = 1.0
|
||||
a = a + b
|
||||
!$acc end parallel
|
||||
|
||||
if (a .ne. 5.0) call abort
|
||||
|
||||
!$acc parallel default (none) copy (a)
|
||||
c = a
|
||||
a = 1.0
|
||||
a = a + c
|
||||
!$acc end parallel
|
||||
|
||||
if (a .ne. 6.0) call abort
|
||||
|
||||
!$acc data copy (a)
|
||||
!$acc parallel default (none)
|
||||
c = a
|
||||
a = 1.0
|
||||
a = a + c
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
if (a .ne. 7.0) call abort
|
||||
|
||||
end program main
|
42
libgomp/testsuite/libgomp.oacc-fortran/firstprivate-1.f90
Normal file
42
libgomp/testsuite/libgomp.oacc-fortran/firstprivate-1.f90
Normal file
@ -0,0 +1,42 @@
|
||||
! { dg-do run }
|
||||
|
||||
program firstprivate
|
||||
integer, parameter :: Nupper=100
|
||||
integer :: a, b(Nupper), c, d, n
|
||||
include "openacc_lib.h"
|
||||
|
||||
if (acc_get_device_type () .eq. acc_device_nvidia) then
|
||||
n = Nupper
|
||||
else
|
||||
n = 1
|
||||
end if
|
||||
|
||||
b(:) = -1
|
||||
a = 5
|
||||
|
||||
!$acc parallel firstprivate (a) num_gangs (n)
|
||||
!$acc loop gang
|
||||
do i = 1, n
|
||||
a = a + i
|
||||
b(i) = a
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, n
|
||||
if (b(i) .ne. i + a) call abort ()
|
||||
end do
|
||||
|
||||
!$acc data copy (a)
|
||||
!$acc parallel firstprivate (a) copyout (c)
|
||||
a = 10
|
||||
c = a
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel copyout (d) present (a)
|
||||
d = a
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
if (c .ne. 10) call abort ()
|
||||
if (d .ne. 5) call abort ()
|
||||
end program firstprivate
|
79
libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90
Normal file
79
libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90
Normal file
@ -0,0 +1,79 @@
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer, parameter :: n = 100
|
||||
integer i, a(n), b(n)
|
||||
integer x
|
||||
|
||||
do i = 1, n
|
||||
b(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel loop gang (static:*) num_gangs (10)
|
||||
do i = 1, n
|
||||
a(i) = b(i) + 0
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call test (a, b, 0, n)
|
||||
|
||||
!$acc parallel loop gang (static:1) num_gangs (10)
|
||||
do i = 1, n
|
||||
a(i) = b(i) + 1
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call test (a, b, 1, n)
|
||||
|
||||
!$acc parallel loop gang (static:2) num_gangs (10)
|
||||
do i = 1, n
|
||||
a(i) = b(i) + 2
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call test (a, b, 2, n)
|
||||
|
||||
!$acc parallel loop gang (static:5) num_gangs (10)
|
||||
do i = 1, n
|
||||
a(i) = b(i) + 5
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call test (a, b, 5, n)
|
||||
|
||||
!$acc parallel loop gang (static:20) num_gangs (10)
|
||||
do i = 1, n
|
||||
a(i) = b(i) + 20
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call test (a, b, 20, n)
|
||||
|
||||
x = 5
|
||||
!$acc parallel loop gang (static:0+x) num_gangs (10)
|
||||
do i = 1, n
|
||||
a(i) = b(i) + 5
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call test (a, b, 5, n)
|
||||
|
||||
x = 10
|
||||
!$acc parallel loop gang (static:x) num_gangs (10)
|
||||
do i = 1, n
|
||||
a(i) = b(i) + 10
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call test (a, b, 10, n)
|
||||
end program main
|
||||
|
||||
subroutine test (a, b, sarg, n)
|
||||
integer n
|
||||
integer a (n), b(n), sarg
|
||||
integer i
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. b(i) + sarg) call abort ()
|
||||
end do
|
||||
end subroutine test
|
886
libgomp/testsuite/libgomp.oacc-fortran/if-1.f90
Normal file
886
libgomp/testsuite/libgomp.oacc-fortran/if-1.f90
Normal file
@ -0,0 +1,886 @@
|
||||
! { dg-do run }
|
||||
! { dg-additional-options "-cpp" }
|
||||
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
|
||||
integer, parameter :: N = 8
|
||||
integer, parameter :: one = 1
|
||||
integer, parameter :: zero = 0
|
||||
integer i, nn
|
||||
real, allocatable :: a(:), b(:)
|
||||
real exp, exp2
|
||||
|
||||
i = 0
|
||||
|
||||
allocate (a(N))
|
||||
allocate (b(N))
|
||||
|
||||
a(:) = 4.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 5.0
|
||||
#else
|
||||
exp = 4.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 16.0
|
||||
|
||||
!$acc parallel if (0 == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 17.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 8.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 9.0
|
||||
#else
|
||||
exp = 8.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 22.0
|
||||
|
||||
!$acc parallel if (zero == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 23.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 16.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 17.0;
|
||||
#else
|
||||
exp = 16.0;
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 76.0
|
||||
|
||||
!$acc parallel if (.FALSE.)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 77.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 22.0
|
||||
|
||||
nn = 1
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 23.0;
|
||||
#else
|
||||
exp = 22.0;
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 18.0
|
||||
|
||||
nn = 0
|
||||
|
||||
!$acc parallel if (nn == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 19.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 49.0
|
||||
|
||||
nn = 1
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 50.0
|
||||
#else
|
||||
exp = 49.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 38.0
|
||||
|
||||
nn = 0;
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 39.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 91.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 92.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 43.0
|
||||
|
||||
!$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 44.0
|
||||
#else
|
||||
exp = 43.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 87.0
|
||||
|
||||
!$acc parallel if (one == 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 88.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 9.0
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 0.0
|
||||
exp2 = 0.0
|
||||
#else
|
||||
call acc_copyin (a, sizeof (a))
|
||||
call acc_copyin (b, sizeof (b))
|
||||
exp = 3.0;
|
||||
exp2 = 9.0;
|
||||
#endif
|
||||
|
||||
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
a(:) = 0.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. exp) call abort
|
||||
if (b(i) .ne. exp2) call abort
|
||||
end do
|
||||
|
||||
a(:) = 6.0
|
||||
b(:) = 12.0
|
||||
|
||||
!$acc update device (a(1:N), b(1:N)) if (0 == 1)
|
||||
|
||||
a(:) = 0.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. exp) call abort
|
||||
if (b(i) .ne. exp2) call abort
|
||||
end do
|
||||
|
||||
a(:) = 26.0
|
||||
b(:) = 21.0
|
||||
|
||||
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
a(:) = 0.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc update host (a(1:N), b(1:N)) if (0 == 1)
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 0.0) call abort
|
||||
if (b(i) .ne. 0.0) call abort
|
||||
end do
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
call acc_copyout (a, sizeof (a))
|
||||
call acc_copyout (b, sizeof (b))
|
||||
#endif
|
||||
|
||||
a(:) = 4.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
|
||||
|
||||
!$acc parallel present (a(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 4.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 8.0
|
||||
b(:) = 1.0
|
||||
|
||||
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc end data
|
||||
|
||||
a(:) = 18.0
|
||||
b(:) = 21.0
|
||||
|
||||
!$acc data copyin (a(1:N)) if (1 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (a) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc data copyout (b(1:N)) if (0 == 1)
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
!$acc data copyout (b(1:N)) if (1 == 1)
|
||||
|
||||
!$acc parallel present (a(1:N)) present (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc end data
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (b(1) .ne. 18.0) call abort
|
||||
end do
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (0 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (0 == 1)
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (1 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (1 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (zero == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (zero == 1)
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (one == 0)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (one == 0)
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
a(:) = 4.0
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 5.0
|
||||
#else
|
||||
exp = 4.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 16.0
|
||||
|
||||
!$acc kernels if (0 == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 17.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 8.0
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 9.0
|
||||
#else
|
||||
exp = 8.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 22.0
|
||||
|
||||
!$acc kernels if (zero == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 23.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 16.0
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 17.0;
|
||||
#else
|
||||
exp = 16.0;
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 76.0
|
||||
|
||||
!$acc kernels if (.FALSE.)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 77.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 22.0
|
||||
|
||||
nn = 1
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 23.0;
|
||||
#else
|
||||
exp = 22.0;
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 18.0
|
||||
|
||||
nn = 0
|
||||
|
||||
!$acc kernels if (nn == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 19.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 49.0
|
||||
|
||||
nn = 1
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 50.0
|
||||
#else
|
||||
exp = 49.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 38.0
|
||||
|
||||
nn = 0;
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 39.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 91.0
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 92.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 43.0
|
||||
|
||||
!$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 44.0
|
||||
#else
|
||||
exp = 43.0
|
||||
#endif
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. exp) call abort
|
||||
end do
|
||||
|
||||
a(:) = 87.0
|
||||
|
||||
!$acc kernels if (one == 0)
|
||||
do i = 1, N
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
|
||||
b(i) = a(i) + 1
|
||||
else
|
||||
b(i) = a(i)
|
||||
end if
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 88.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 9.0
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
exp = 0.0
|
||||
exp2 = 0.0
|
||||
#else
|
||||
call acc_copyin (a, sizeof (a))
|
||||
call acc_copyin (b, sizeof (b))
|
||||
exp = 3.0;
|
||||
exp2 = 9.0;
|
||||
#endif
|
||||
|
||||
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
a(:) = 0.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. exp) call abort
|
||||
if (b(i) .ne. exp2) call abort
|
||||
end do
|
||||
|
||||
a(:) = 6.0
|
||||
b(:) = 12.0
|
||||
|
||||
!$acc update device (a(1:N), b(1:N)) if (0 == 1)
|
||||
|
||||
a(:) = 0.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc update host (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. exp) call abort
|
||||
if (b(i) .ne. exp2) call abort
|
||||
end do
|
||||
|
||||
a(:) = 26.0
|
||||
b(:) = 21.0
|
||||
|
||||
!$acc update device (a(1:N), b(1:N)) if (1 == 1)
|
||||
|
||||
a(:) = 0.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc update host (a(1:N), b(1:N)) if (0 == 1)
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 0.0) call abort
|
||||
if (b(i) .ne. 0.0) call abort
|
||||
end do
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
call acc_copyout (a, sizeof (a))
|
||||
call acc_copyout (b, sizeof (b))
|
||||
#endif
|
||||
|
||||
a(:) = 4.0
|
||||
b(:) = 0.0
|
||||
|
||||
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
|
||||
|
||||
!$acc kernels present (a(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (b(i) .ne. 4.0) call abort
|
||||
end do
|
||||
|
||||
a(:) = 8.0
|
||||
b(:) = 1.0
|
||||
|
||||
!$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (a) .eqv. .TRUE.) call abort
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc end data
|
||||
|
||||
a(:) = 18.0
|
||||
b(:) = 21.0
|
||||
|
||||
!$acc data copyin (a(1:N)) if (1 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (a) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc data copyout (b(1:N)) if (0 == 1)
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
!$acc data copyout (b(1:N)) if (1 == 1)
|
||||
|
||||
!$acc kernels present (a(1:N)) present (b(1:N))
|
||||
do i = 1, N
|
||||
b(i) = a(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc end data
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
|
||||
do i = 1, N
|
||||
if (b(1) .ne. 18.0) call abort
|
||||
end do
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (0 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (0 == 1)
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (1 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (1 == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (zero == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (zero == 1)
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (one == 0)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (one == 0)
|
||||
|
||||
!$acc enter data copyin (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .FALSE.) call abort
|
||||
#endif
|
||||
|
||||
!$acc exit data delete (b(1:N)) if (one == 1)
|
||||
|
||||
#if !ACC_MEM_SHARED
|
||||
if (acc_is_present (b) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
|
||||
end program main
|
@ -0,0 +1,42 @@
|
||||
! This test checks if the runtime can properly handle implicit
|
||||
! firstprivate varaibles inside subroutines in modules.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
module test_mod
|
||||
contains
|
||||
subroutine test(x)
|
||||
|
||||
IMPLICIT NONE
|
||||
|
||||
INTEGER :: x, y, j
|
||||
|
||||
x = 5
|
||||
|
||||
!$ACC PARALLEL LOOP copyout (y)
|
||||
DO j=1,10
|
||||
y=x
|
||||
ENDDO
|
||||
!$ACC END PARALLEL LOOP
|
||||
|
||||
y = -1;
|
||||
|
||||
!$ACC PARALLEL LOOP firstprivate (y) copyout (x)
|
||||
DO j=1,10
|
||||
x=y
|
||||
ENDDO
|
||||
!$ACC END PARALLEL LOOP
|
||||
end subroutine test
|
||||
end module test_mod
|
||||
|
||||
program t
|
||||
use test_mod
|
||||
|
||||
INTEGER :: x_min
|
||||
|
||||
x_min = 8
|
||||
|
||||
CALL test(x_min)
|
||||
|
||||
if (x_min .ne. -1) call abort
|
||||
end program t
|
19
libgomp/testsuite/libgomp.oacc-fortran/pr68813.f90
Normal file
19
libgomp/testsuite/libgomp.oacc-fortran/pr68813.f90
Normal file
@ -0,0 +1,19 @@
|
||||
program foo
|
||||
implicit none
|
||||
integer, parameter :: n = 100
|
||||
integer, dimension(n,n) :: a
|
||||
integer :: i, j, sum = 0
|
||||
|
||||
a = 1
|
||||
|
||||
!$acc parallel copyin(a(1:n,1:n)) firstprivate (sum)
|
||||
!$acc loop gang reduction(+:sum)
|
||||
do i=1, n
|
||||
!$acc loop vector reduction(+:sum)
|
||||
do j=1, n
|
||||
sum = sum + a(i, j)
|
||||
enddo
|
||||
enddo
|
||||
!$acc end parallel
|
||||
|
||||
end program foo
|
544
libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90
Normal file
544
libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90
Normal file
@ -0,0 +1,544 @@
|
||||
! Miscellaneous tests for private variables.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
|
||||
! Test of gang-private variables declared on loop directive.
|
||||
|
||||
subroutine t1()
|
||||
integer :: x, i, arr(32)
|
||||
|
||||
do i = 1, 32
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang private(x)
|
||||
do i = 1, 32
|
||||
x = i * 2;
|
||||
arr(i) = arr(i) + x
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, 32
|
||||
if (arr(i) .ne. i * 3) call abort
|
||||
end do
|
||||
end subroutine t1
|
||||
|
||||
|
||||
! Test of gang-private variables declared on loop directive, with broadcasting
|
||||
! to partitioned workers.
|
||||
|
||||
subroutine t2()
|
||||
integer :: x, i, j, arr(0:32*32)
|
||||
|
||||
do i = 0, 32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang private(x)
|
||||
do i = 0, 31
|
||||
x = i * 2;
|
||||
|
||||
!$acc loop worker
|
||||
do j = 0, 31
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + x
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + (i / 32) * 2) call abort
|
||||
end do
|
||||
end subroutine t2
|
||||
|
||||
|
||||
! Test of gang-private variables declared on loop directive, with broadcasting
|
||||
! to partitioned vectors.
|
||||
|
||||
subroutine t3()
|
||||
integer :: x, i, j, arr(0:32*32)
|
||||
|
||||
do i = 0, 32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang private(x)
|
||||
do i = 0, 31
|
||||
x = i * 2;
|
||||
|
||||
!$acc loop vector
|
||||
do j = 0, 31
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + x
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + (i / 32) * 2) call abort
|
||||
end do
|
||||
end subroutine t3
|
||||
|
||||
|
||||
! Test of gang-private addressable variable declared on loop directive, with
|
||||
! broadcasting to partitioned workers.
|
||||
|
||||
subroutine t4()
|
||||
type vec3
|
||||
integer x, y, z, attr(13)
|
||||
end type vec3
|
||||
|
||||
integer i, j, arr(0:32*32)
|
||||
type(vec3) pt
|
||||
|
||||
do i = 0, 32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang private(pt)
|
||||
do i = 0, 31
|
||||
pt%x = i
|
||||
pt%y = i * 2
|
||||
pt%z = i * 4
|
||||
pt%attr(5) = i * 6
|
||||
|
||||
!$acc loop vector
|
||||
do j = 0, 31
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5);
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + (i / 32) * 13) call abort
|
||||
end do
|
||||
end subroutine t4
|
||||
|
||||
|
||||
! Test of vector-private variables declared on loop directive.
|
||||
|
||||
subroutine t5()
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker
|
||||
do j = 0, 31
|
||||
!$acc loop vector private(x)
|
||||
do k = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
!$acc loop vector private(x)
|
||||
do k = 0, 31
|
||||
x = ior(i, j * 5)
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t5
|
||||
|
||||
|
||||
! Test of vector-private variables declared on loop directive. Array type.
|
||||
|
||||
subroutine t6()
|
||||
integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker
|
||||
do j = 0, 31
|
||||
!$acc loop vector private(x, pt)
|
||||
do k = 0, 31
|
||||
pt(1) = ieor(i, j * 3)
|
||||
pt(2) = ior(i, j * 5)
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t6
|
||||
|
||||
|
||||
! Test of worker-private variables declared on a loop directive.
|
||||
|
||||
subroutine t7()
|
||||
integer :: x, i, j, arr(0:32*32)
|
||||
common x
|
||||
|
||||
do i = 0, 32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang private(x)
|
||||
do i = 0, 31
|
||||
!$acc loop worker private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + x
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + ieor(i / 32, mod(i, 32) * 3)) call abort
|
||||
end do
|
||||
end subroutine t7
|
||||
|
||||
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode.
|
||||
|
||||
subroutine t8()
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k) call abort
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t8
|
||||
|
||||
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Back-to-back worker loops.
|
||||
|
||||
subroutine t9()
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
|
||||
!$acc loop worker private(x)
|
||||
do j = 0, 31
|
||||
x = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t9
|
||||
|
||||
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Successive vector loops. */
|
||||
|
||||
subroutine t10()
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
|
||||
x = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t10
|
||||
|
||||
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Addressable worker variable.
|
||||
|
||||
subroutine t11()
|
||||
integer :: i, j, k, idx, arr(0:32*32*32)
|
||||
integer, target :: x
|
||||
integer, pointer :: p
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker private(x, p)
|
||||
do j = 0, 31
|
||||
p => x
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
|
||||
p = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t11
|
||||
|
||||
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Aggregate worker variable.
|
||||
|
||||
subroutine t12()
|
||||
type vec2
|
||||
integer x, y
|
||||
end type vec2
|
||||
|
||||
integer :: i, j, k, idx, arr(0:32*32*32)
|
||||
type(vec2) :: pt
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker private(pt)
|
||||
do j = 0, 31
|
||||
pt%x = ieor(i, j * 3)
|
||||
pt%y = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k
|
||||
end do
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t12
|
||||
|
||||
|
||||
! Test of worker-private variables declared on loop directive, broadcasting
|
||||
! to vector-partitioned mode. Array worker variable.
|
||||
|
||||
subroutine t13()
|
||||
integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
||||
!$acc loop gang
|
||||
do i = 0, 31
|
||||
!$acc loop worker private(pt)
|
||||
do j = 0, 31
|
||||
pt(1) = ieor(i, j * 3)
|
||||
pt(2) = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
|
||||
end do
|
||||
|
||||
!$acc loop vector
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end subroutine t13
|
||||
|
||||
|
||||
! Test of gang-private variables declared on the parallel directive.
|
||||
|
||||
subroutine t14()
|
||||
use openacc
|
||||
integer :: x = 5
|
||||
integer, parameter :: n = 32
|
||||
integer :: arr(n)
|
||||
|
||||
do i = 1, n
|
||||
arr(i) = 3
|
||||
end do
|
||||
|
||||
!$acc parallel private(x) copy(arr) num_gangs(n) num_workers(8) vector_length(32)
|
||||
!$acc loop gang(static:1)
|
||||
do i = 1, n
|
||||
x = i * 2;
|
||||
end do
|
||||
|
||||
!$acc loop gang(static:1)
|
||||
do i = 1, n
|
||||
if (acc_on_device (acc_device_host) .eqv. .TRUE.) x = i * 2
|
||||
arr(i) = arr(i) + x
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, n
|
||||
if (arr(i) .ne. (3 + i * 2)) call abort
|
||||
end do
|
||||
|
||||
end subroutine t14
|
||||
|
||||
|
||||
program main
|
||||
call t1()
|
||||
call t2()
|
||||
call t3()
|
||||
call t4()
|
||||
call t5()
|
||||
call t6()
|
||||
call t7()
|
||||
call t8()
|
||||
call t9()
|
||||
call t10()
|
||||
call t11()
|
||||
call t12()
|
||||
call t13()
|
||||
call t14()
|
||||
end program main
|
Loading…
x
Reference in New Issue
Block a user