mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 13:30:58 +08:00
Update OpenACC testcases
gcc/testsuite/ * c-c++-common/goacc/deviceptr-4.c: New file. * c-c++-common/goacc/kernels-counter-var-redundant-load.c: Likewise. * c-c++-common/goacc/kernels-loop-data-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise. * c-c++-common/goacc/kernels-loop-data-update.c: Likewise. * c-c++-common/goacc/kernels-loop-data.c: Likewise. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Likewise. * c-c++-common/goacc/parallel-reduction.c: Likewise. * c-c++-common/goacc/private-reduction-1.c: Likewise. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: Likewise. * gfortran.dg/goacc/modules.f95: Likewise. * gfortran.dg/goacc/routine-8.f90: Likewise. * gfortran.dg/goacc/routine-level-of-parallelism-1.f90: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Don't force "-O2". * testsuite/libgomp.oacc-c-c++-common/data-2.c: Update. * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: Likewise. * testsuite/libgomp.oacc-fortran/data-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-2.f90: Likewise. * testsuite/libgomp.oacc-c++/non-scalar-data.C: New file. * testsuite/libgomp.oacc-c-c++-common/declare-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/enter-data.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h: Likewise. * testsuite/libgomp.oacc-fortran/cublas-fixed.h: Likewise. * testsuite/libgomp.oacc-fortran/dummy-array.f90: Likewise. * testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise. * testsuite/libgomp.oacc-fortran/host_data-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-independent.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-loop-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-map-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-13.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-14.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-15.f90: Likewise. * testsuite/libgomp.oacc-fortran/parallel-loop-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/reference-reductions.f90: Likewise. * testsuite/libgomp.oacc-fortran/vector-routine.f90: Likewise. Co-Authored-By: James Norris <jnorris@codesourcery.com> Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r261884
This commit is contained in:
parent
ebbb116851
commit
31dd69b7ff
@ -1,3 +1,26 @@
|
||||
2018-06-22 Cesar Philippidis <cesar@codesourcery.com>
|
||||
James Norris <jnorris@codesourcery.com>
|
||||
Thomas Schwinge <thomas@codesourcery.com>
|
||||
Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* c-c++-common/goacc/deviceptr-4.c: New file.
|
||||
* c-c++-common/goacc/kernels-counter-var-redundant-load.c:
|
||||
Likewise.
|
||||
* c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
|
||||
* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
|
||||
* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
|
||||
* c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
|
||||
* c-c++-common/goacc/kernels-loop-data.c: Likewise.
|
||||
* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c:
|
||||
Likewise.
|
||||
* c-c++-common/goacc/parallel-reduction.c: Likewise.
|
||||
* c-c++-common/goacc/private-reduction-1.c: Likewise.
|
||||
* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
|
||||
Likewise.
|
||||
* gfortran.dg/goacc/modules.f95: Likewise.
|
||||
* gfortran.dg/goacc/routine-8.f90: Likewise.
|
||||
* gfortran.dg/goacc/routine-level-of-parallelism-1.f90: Likewise.
|
||||
|
||||
2018-06-21 Michael Meissner <meissner@linux.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/pack02.c: Use __ibm128 instead of long double
|
||||
|
11
gcc/testsuite/c-c++-common/goacc/deviceptr-4.c
Normal file
11
gcc/testsuite/c-c++-common/goacc/deviceptr-4.c
Normal file
@ -0,0 +1,11 @@
|
||||
/* { dg-additional-options "-fdump-tree-gimple" } */
|
||||
|
||||
void
|
||||
subr (int *a)
|
||||
{
|
||||
#pragma acc data deviceptr (a)
|
||||
#pragma acc parallel
|
||||
a[0] += 1.0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 "gimple" } } */
|
@ -0,0 +1,34 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-fdump-tree-dom3" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
COUNTERTYPE
|
||||
foo (unsigned int *c)
|
||||
{
|
||||
COUNTERTYPE ii;
|
||||
|
||||
#pragma acc kernels copyout (c[0:N])
|
||||
{
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = 1;
|
||||
}
|
||||
|
||||
return ii;
|
||||
}
|
||||
|
||||
/* We're expecting:
|
||||
|
||||
.omp_data_i_10 = &.omp_data_arr.3;
|
||||
_11 = .omp_data_i_10->ii;
|
||||
*_11 = 0;
|
||||
_15 = .omp_data_i_10->c;
|
||||
c.1_16 = *_15;
|
||||
|
||||
Check that there's only one load from anonymous ssa-name (which we assume to
|
||||
be the one to read c), and that there's no such load for ii. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 1 "dom3" } } */
|
68
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
Normal file
68
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
Normal file
@ -0,0 +1,68 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc data copyout (a[0:N])
|
||||
{
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc data copyout (b[0:N])
|
||||
{
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N])
|
||||
{
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
|
@ -0,0 +1,66 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N])
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
#pragma acc exit data copyout (a[0:N])
|
||||
|
||||
#pragma acc enter data create (b[0:N])
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
#pragma acc exit data copyout (b[0:N])
|
||||
|
||||
|
||||
#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N])
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
#pragma acc exit data copyout (c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
|
@ -0,0 +1,63 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
#pragma acc exit data copyout (a[0:N], c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
|
63
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
Normal file
63
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
Normal file
@ -0,0 +1,63 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc update device (b[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
#pragma acc exit data copyout (a[0:N], c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only two loops are analyzed, and that both can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
|
62
gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
Normal file
62
gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc data copyout (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
|
@ -0,0 +1,66 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
#pragma acc parallel present (b[0:N])
|
||||
{
|
||||
#pragma acc loop
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only two loops are analyzed, and that both can be
|
||||
parallelized. */
|
||||
// FIXME: OpenACC kernels stopped working with the firstprivate subarray
|
||||
// changes.
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
|
17
gcc/testsuite/c-c++-common/goacc/parallel-reduction.c
Normal file
17
gcc/testsuite/c-c++-common/goacc/parallel-reduction.c
Normal file
@ -0,0 +1,17 @@
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int sum = 0;
|
||||
int dummy = 0;
|
||||
|
||||
#pragma acc data copy (dummy)
|
||||
{
|
||||
#pragma acc parallel num_gangs (10) copy (sum) reduction (+:sum)
|
||||
{
|
||||
int v = 5;
|
||||
sum += 10 + v;
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
12
gcc/testsuite/c-c++-common/goacc/private-reduction-1.c
Normal file
12
gcc/testsuite/c-c++-common/goacc/private-reduction-1.c
Normal file
@ -0,0 +1,12 @@
|
||||
int
|
||||
reduction ()
|
||||
{
|
||||
int i, r;
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop private (r) reduction (+:r)
|
||||
for (i = 0; i < 100; i++)
|
||||
r += 10;
|
||||
|
||||
return r;
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
! { dg-additional-options "-O2" }
|
||||
! { dg-additional-options "-fdump-tree-parloops1-all" }
|
||||
! { dg-additional-options "-fdump-tree-optimized" }
|
||||
|
||||
program main
|
||||
implicit none
|
||||
integer, parameter :: n = 1024
|
||||
integer, dimension (0:n-1) :: a, b, c
|
||||
integer :: i, ii
|
||||
|
||||
!$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
|
||||
|
||||
!$acc kernels present (a(0:n-1))
|
||||
do i = 0, n - 1
|
||||
a(i) = i * 2
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc parallel present (b(0:n-1))
|
||||
!$acc loop
|
||||
do i = 0, n -1
|
||||
b(i) = i * 4
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
|
||||
do ii = 0, n - 1
|
||||
c(ii) = a(ii) + b(ii)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
|
||||
|
||||
do i = 0, n - 1
|
||||
if (c(i) .ne. a(i) + b(i)) call abort
|
||||
end do
|
||||
|
||||
end program main
|
||||
|
||||
! Check that only three loops are analyzed, and that all can be parallelized.
|
||||
! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } }
|
||||
! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } }
|
||||
! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } }
|
||||
|
||||
! Check that the loop has been split off into a function.
|
||||
! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
|
||||
! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
|
55
gcc/testsuite/gfortran.dg/goacc/modules.f95
Normal file
55
gcc/testsuite/gfortran.dg/goacc/modules.f95
Normal file
@ -0,0 +1,55 @@
|
||||
! { dg-do compile }
|
||||
|
||||
MODULE reduction_test
|
||||
|
||||
CONTAINS
|
||||
|
||||
SUBROUTINE reduction_kernel(x_min,x_max,y_min,y_max,arr,sum)
|
||||
|
||||
IMPLICIT NONE
|
||||
|
||||
INTEGER :: x_min,x_max,y_min,y_max
|
||||
REAL(KIND=8), DIMENSION(x_min-2:x_max+2,y_min-2:y_max+2) :: arr
|
||||
REAL(KIND=8) :: sum
|
||||
|
||||
INTEGER :: j,k
|
||||
|
||||
sum=0.0
|
||||
|
||||
!$ACC DATA PRESENT(arr) COPY(sum)
|
||||
!$ACC PARALLEL LOOP REDUCTION(+ : sum)
|
||||
DO k=y_min,y_max
|
||||
DO j=x_min,x_max
|
||||
sum=sum*arr(j,k)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$ACC END PARALLEL LOOP
|
||||
!$ACC END DATA
|
||||
|
||||
END SUBROUTINE reduction_kernel
|
||||
|
||||
END MODULE reduction_test
|
||||
|
||||
program main
|
||||
use reduction_test
|
||||
|
||||
integer :: x_min,x_max,y_min,y_max
|
||||
real(kind=8), dimension(1:10,1:10) :: arr
|
||||
real(kind=8) :: sum
|
||||
|
||||
x_min = 5
|
||||
x_max = 6
|
||||
y_min = 5
|
||||
y_max = 6
|
||||
|
||||
arr(:,:) = 1.0
|
||||
|
||||
sum = 1.0
|
||||
|
||||
!$acc data copy(arr)
|
||||
|
||||
call field_summary_kernel(x_min,x_max,y_min,y_max,arr,sum)
|
||||
|
||||
!$acc end data
|
||||
|
||||
end program
|
32
gcc/testsuite/gfortran.dg/goacc/routine-8.f90
Normal file
32
gcc/testsuite/gfortran.dg/goacc/routine-8.f90
Normal file
@ -0,0 +1,32 @@
|
||||
! Test ACC ROUTINE inside an interface block.
|
||||
|
||||
program main
|
||||
interface
|
||||
function s_1 (a)
|
||||
integer a
|
||||
!$acc routine
|
||||
end function s_1
|
||||
end interface
|
||||
|
||||
interface
|
||||
function s_2 (a)
|
||||
integer a
|
||||
!$acc routine seq
|
||||
end function s_2
|
||||
end interface
|
||||
|
||||
interface
|
||||
function s_3 (a)
|
||||
integer a
|
||||
!$acc routine (s_3) ! { dg-error "Only the ..ACC ROUTINE form without list is allowed in interface block" }
|
||||
end function s_3
|
||||
end interface
|
||||
|
||||
interface
|
||||
function s_4 (a)
|
||||
integer a
|
||||
!$acc routine (s_4) seq ! { dg-error "Only the ..ACC ROUTINE form without list is allowed in interface block" }
|
||||
end function s_4
|
||||
end interface
|
||||
end program main
|
||||
|
@ -0,0 +1,72 @@
|
||||
! Test various aspects of clauses specifying compatible levels of
|
||||
! parallelism with the OpenACC routine directive. The Fortran counterpart is
|
||||
! c-c++-common/goacc/routine-level-of-parallelism-2.c
|
||||
|
||||
subroutine g_1
|
||||
!$acc routine gang
|
||||
end subroutine g_1
|
||||
|
||||
subroutine s_1_2a
|
||||
!$acc routine
|
||||
end subroutine s_1_2a
|
||||
|
||||
subroutine s_1_2b
|
||||
!$acc routine seq
|
||||
end subroutine s_1_2b
|
||||
|
||||
subroutine s_1_2c
|
||||
!$acc routine (s_1_2c)
|
||||
end subroutine s_1_2c
|
||||
|
||||
subroutine s_1_2d
|
||||
!$acc routine (s_1_2d) seq
|
||||
end subroutine s_1_2d
|
||||
|
||||
module s_2
|
||||
contains
|
||||
subroutine s_2_1a
|
||||
!$acc routine
|
||||
end subroutine s_2_1a
|
||||
|
||||
subroutine s_2_1b
|
||||
!$acc routine seq
|
||||
end subroutine s_2_1b
|
||||
|
||||
subroutine s_2_1c
|
||||
!$acc routine (s_2_1c)
|
||||
end subroutine s_2_1c
|
||||
|
||||
subroutine s_2_1d
|
||||
!$acc routine (s_2_1d) seq
|
||||
end subroutine s_2_1d
|
||||
end module s_2
|
||||
|
||||
subroutine test
|
||||
external g_1, w_1, v_1
|
||||
external s_1_1, s_1_2
|
||||
|
||||
interface
|
||||
function s_3_1a (a)
|
||||
integer a
|
||||
!$acc routine
|
||||
end function s_3_1a
|
||||
end interface
|
||||
|
||||
interface
|
||||
function s_3_1b (a)
|
||||
integer a
|
||||
!$acc routine seq
|
||||
end function s_3_1b
|
||||
end interface
|
||||
|
||||
!$acc routine(g_1) gang
|
||||
|
||||
!$acc routine(w_1) worker
|
||||
|
||||
!$acc routine(v_1) worker
|
||||
|
||||
! Also test the implicit seq clause.
|
||||
|
||||
!$acc routine (s_1_1) seq
|
||||
|
||||
end subroutine test
|
@ -1,3 +1,128 @@
|
||||
2018-06-22 Cesar Philippidis <cesar@codesourcery.com>
|
||||
James Norris <jnorris@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
Thomas Schwinge <thomas@codesourcery.com>
|
||||
Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Don't force "-O2".
|
||||
* testsuite/libgomp.oacc-c-c++-common/data-2.c: Update.
|
||||
* testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/data-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/data-2.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-c++/non-scalar-data.C: New file.
|
||||
* testsuite/libgomp.oacc-c-c++-common/declare-3.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/enter-data.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h: Likewise.
|
||||
* testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/cublas-fixed.h: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/dummy-array.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/host_data-4.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-independent.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-loop-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-map-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90:
|
||||
Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/lib-13.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/lib-14.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/lib-15.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/parallel-loop-1.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/reference-reductions.f90: Likewise.
|
||||
* testsuite/libgomp.oacc-fortran/vector-routine.f90: Likewise.
|
||||
|
||||
2018-06-20 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
Thomas Schwinge <thomas@codesourcery.com>
|
||||
Cesar Philippidis <cesar@codesourcery.com>
|
||||
|
110
libgomp/testsuite/libgomp.oacc-c++/non-scalar-data.C
Normal file
110
libgomp/testsuite/libgomp.oacc-c++/non-scalar-data.C
Normal file
@ -0,0 +1,110 @@
|
||||
// Ensure that a non-scalar dummy arguments which are implicitly used inside
|
||||
// offloaded regions are properly mapped using present_or_copy semantics.
|
||||
|
||||
// { dg-xfail-if "TODO" { *-*-* } }
|
||||
// { dg-excess-errors "ICE" }
|
||||
|
||||
#include <cassert>
|
||||
|
||||
const int n = 100;
|
||||
|
||||
struct data {
|
||||
int v;
|
||||
};
|
||||
|
||||
void
|
||||
kernels_present (data &d, int &x)
|
||||
{
|
||||
#pragma acc kernels present (d, x) default (none)
|
||||
{
|
||||
d.v = x;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
parallel_present (data &d, int &x)
|
||||
{
|
||||
#pragma acc parallel present (d, x) default (none)
|
||||
{
|
||||
d.v = x;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
kernels_implicit (data &d, int &x)
|
||||
{
|
||||
#pragma acc kernels
|
||||
{
|
||||
d.v = x;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
parallel_implicit (data &d, int &x)
|
||||
{
|
||||
#pragma acc parallel
|
||||
{
|
||||
d.v = x;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
reference_data (data &d, int &x)
|
||||
{
|
||||
#pragma acc data copy(d, x)
|
||||
{
|
||||
kernels_present (d, x);
|
||||
|
||||
#pragma acc update host(d)
|
||||
assert (d.v == x);
|
||||
|
||||
x = 200;
|
||||
#pragma acc update device(x)
|
||||
|
||||
parallel_present (d, x);
|
||||
}
|
||||
|
||||
assert (d.v == x);
|
||||
|
||||
x = 300;
|
||||
kernels_implicit (d, x);
|
||||
assert (d.v == x);
|
||||
|
||||
x = 400;
|
||||
parallel_implicit (d, x);
|
||||
assert (d.v == x);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
data d;
|
||||
int x = 100;
|
||||
|
||||
#pragma acc data copy(d, x)
|
||||
{
|
||||
kernels_present (d, x);
|
||||
|
||||
#pragma acc update host(d)
|
||||
assert (d.v == x);
|
||||
|
||||
x = 200;
|
||||
#pragma acc update device(x)
|
||||
|
||||
parallel_present (d, x);
|
||||
}
|
||||
|
||||
assert (d.v == x);
|
||||
|
||||
x = 300;
|
||||
kernels_implicit (d, x);
|
||||
assert (d.v == x);
|
||||
|
||||
x = 400;
|
||||
parallel_implicit (d, x);
|
||||
assert (d.v == x);
|
||||
|
||||
reference_data (d, x);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
/* Test 'acc enter/exit data' regions. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
@ -33,6 +34,32 @@ main (int argc, char **argv)
|
||||
b[i] = a[i];
|
||||
|
||||
#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait async
|
||||
#pragma acc wait
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (a[i] != 3.0)
|
||||
abort ();
|
||||
|
||||
if (b[i] != 3.0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 3.0;
|
||||
b[i] = 0.0;
|
||||
}
|
||||
|
||||
#pragma acc enter data copyin (a[0:N]) async
|
||||
#pragma acc enter data copyin (b[0:N]) async wait
|
||||
#pragma acc enter data copyin (N) async wait
|
||||
#pragma acc parallel async wait
|
||||
#pragma acc loop
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = a[i];
|
||||
|
||||
#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) delete (N) wait async
|
||||
#pragma acc wait
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
|
61
libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
Normal file
61
libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
Normal file
@ -0,0 +1,61 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <openacc.h>
|
||||
|
||||
float *b;
|
||||
#pragma acc declare deviceptr (b)
|
||||
|
||||
#pragma acc routine
|
||||
float *
|
||||
subr2 (void)
|
||||
{
|
||||
return b;
|
||||
}
|
||||
|
||||
float
|
||||
subr1 (float a)
|
||||
{
|
||||
float b;
|
||||
#pragma acc declare present_or_copy (b)
|
||||
float c;
|
||||
#pragma acc declare present_or_copyin (c)
|
||||
float d;
|
||||
#pragma acc declare present_or_create (d)
|
||||
float e;
|
||||
#pragma acc declare present_or_copyout (e)
|
||||
|
||||
#pragma acc parallel copy (a)
|
||||
{
|
||||
b = a;
|
||||
c = b;
|
||||
d = c;
|
||||
e = d;
|
||||
a = e;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
float a;
|
||||
float *c;
|
||||
|
||||
a = 2.0;
|
||||
|
||||
a = subr1 (a);
|
||||
|
||||
if (a != 2.0)
|
||||
abort ();
|
||||
|
||||
b = (float *) acc_malloc (sizeof (float));
|
||||
|
||||
c = subr2 ();
|
||||
|
||||
if (b != c)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
23
libgomp/testsuite/libgomp.oacc-c-c++-common/enter-data.c
Normal file
23
libgomp/testsuite/libgomp.oacc-c-c++-common/enter-data.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* This test verifies that the present data clauses to acc enter data
|
||||
don't cause duplicate mapping failures at runtime. */
|
||||
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int a;
|
||||
|
||||
#pragma acc enter data copyin (a)
|
||||
#pragma acc enter data pcopyin (a)
|
||||
#pragma acc enter data pcreate (a)
|
||||
#pragma acc exit data delete (a)
|
||||
|
||||
#pragma acc enter data create (a)
|
||||
#pragma acc enter data pcreate (a)
|
||||
#pragma acc exit data delete (a)
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,14 +1,16 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
|
||||
/* { dg-additional-options "-lm -lcuda -lcublas -lcudart -Wall -Wextra" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <openacc.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#pragma acc routine
|
||||
void
|
||||
saxpy_host (int n, float a, float *x, float *y)
|
||||
saxpy (int n, float a, float *x, float *y)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -16,18 +18,18 @@ saxpy_host (int n, float a, float *x, float *y)
|
||||
y[i] = y[i] + a * x[i];
|
||||
}
|
||||
|
||||
#pragma acc routine
|
||||
void
|
||||
saxpy_target (int n, float a, float *x, float *y)
|
||||
validate_results (int n, float *a, float *b)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
y[i] = y[i] + a * x[i];
|
||||
if (fabs (a[i] - b[i]) > .00001)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
main()
|
||||
{
|
||||
#define N 8
|
||||
int i;
|
||||
@ -42,7 +44,7 @@ main(int argc, char **argv)
|
||||
y[i] = y_ref[i] = 3.0;
|
||||
}
|
||||
|
||||
saxpy_host (N, a, x_ref, y_ref);
|
||||
saxpy (N, a, x_ref, y_ref);
|
||||
|
||||
cublasCreate (&h);
|
||||
|
||||
@ -54,11 +56,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (y[i] != y_ref[i])
|
||||
abort ();
|
||||
}
|
||||
validate_results (N, y, y_ref);
|
||||
|
||||
#pragma acc data create (x[0:N]) copyout (y[0:N])
|
||||
{
|
||||
@ -74,11 +72,7 @@ main(int argc, char **argv)
|
||||
|
||||
cublasDestroy (h);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (y[i] != y_ref[i])
|
||||
abort ();
|
||||
}
|
||||
validate_results (N, y, y_ref);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
y[i] = 3.0;
|
||||
@ -87,14 +81,24 @@ main(int argc, char **argv)
|
||||
#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
|
||||
{
|
||||
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
|
||||
saxpy_target (N, a, x, y);
|
||||
saxpy (N, a, x, y);
|
||||
}
|
||||
|
||||
validate_results (N, y, y_ref);
|
||||
|
||||
/* Exercise host_data with data transferred with acc enter data. */
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (y[i] != y_ref[i])
|
||||
abort ();
|
||||
}
|
||||
y[i] = 3.0;
|
||||
|
||||
#pragma acc enter data copyin (x, a, y)
|
||||
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
|
||||
{
|
||||
saxpy (N, a, x, y);
|
||||
}
|
||||
#pragma acc exit data delete (x, a) copyout (y)
|
||||
|
||||
validate_results (N, y, y_ref);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -0,0 +1,53 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc data copyout (a[0:N])
|
||||
{
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc data copyout (b[0:N])
|
||||
{
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N])
|
||||
{
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N])
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
#pragma acc exit data copyout (a[0:N])
|
||||
|
||||
#pragma acc enter data create (b[0:N])
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
#pragma acc exit data copyout (b[0:N])
|
||||
|
||||
|
||||
#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N])
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
#pragma acc exit data copyout (c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc update device (b[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
#pragma acc exit data copyout (a[0:N], c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -0,0 +1,47 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc data copyout (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (b[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
#pragma acc kernels present (a[0:N])
|
||||
{
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
#pragma acc parallel present (b[0:N])
|
||||
{
|
||||
#pragma acc loop
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Back-to-back worker loops. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Successive vector loops. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
|
||||
x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Aggregate worker variable. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int x, y;
|
||||
} vec2;
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
vec2 pt;
|
||||
|
||||
pt.x = i ^ j * 3;
|
||||
pt.y = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.x * k;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Addressable worker variable. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int x, y;
|
||||
} vec2;
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
vec2 pt, *ptp;
|
||||
|
||||
ptp = &pt;
|
||||
|
||||
pt.x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += ptp->x * k;
|
||||
|
||||
ptp->y = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared in a local scope, broadcasting
|
||||
to vector-partitioned mode. Array worker variable. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int pt[2];
|
||||
|
||||
pt[0] = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
||||
|
||||
pt[1] = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of gang-private variables declared on loop directive. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
#pragma acc loop gang(num:32) private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
x = i * 2;
|
||||
arr[i] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (arr[i] == i * 3);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of gang-private variables declared on loop directive, with broadcasting
|
||||
to partitioned workers. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
#pragma acc loop gang(num:32) private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop worker(num:32)
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 2);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of gang-private variables declared on loop directive, with broadcasting
|
||||
to partitioned vectors. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
#pragma acc loop gang(num:32) private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 2);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of gang-private addressable variable declared on loop directive, with
|
||||
broadcasting to partitioned workers. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
#pragma acc loop gang(num:32) private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int *p = &x;
|
||||
|
||||
x = i * 2;
|
||||
|
||||
#pragma acc loop worker(num:32)
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x;
|
||||
|
||||
(*p)--;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 2);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of gang-private array variable declared on loop directive, with
|
||||
broadcasting to partitioned workers. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x[8], i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
#pragma acc loop gang(num:32) private(x)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
for (int j = 0; j < 8; j++)
|
||||
x[j] = j * 2;
|
||||
|
||||
#pragma acc loop worker(num:32)
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += x[j % 8];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i % 8) * 2);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of gang-private aggregate variable declared on loop directive, with
|
||||
broadcasting to partitioned workers. */
|
||||
|
||||
typedef struct {
|
||||
int x, y, z;
|
||||
int attr[13];
|
||||
} vec3;
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int i, arr[32 * 32];
|
||||
vec3 pt;
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
#pragma acc loop gang private(pt)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
pt.x = i;
|
||||
pt.y = i * 2;
|
||||
pt.z = i * 4;
|
||||
pt.attr[5] = i * 6;
|
||||
|
||||
#pragma acc loop worker
|
||||
for (int j = 0; j < 32; j++)
|
||||
arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + (i / 32) * 13);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of vector-private variables declared on loop directive. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
#pragma acc loop vector(length:32) private(x)
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
x = i ^ j * 3;
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
|
||||
#pragma acc loop vector(length:32) private(x)
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
x = i | j * 5;
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of vector-private variables declared on loop directive. Array type. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int pt[2], i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
#pragma acc loop vector(length:32) private(pt)
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
pt[0] = i ^ j * 3;
|
||||
pt[1] = i | j * 5;
|
||||
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
||||
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32) private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
x = i ^ j * 3;
|
||||
/* Try to ensure 'x' accesses doesn't get optimized into a
|
||||
temporary. */
|
||||
__asm__ __volatile__ ("");
|
||||
arr[i * 32 + j] += x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32) private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Back-to-back worker loops. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32) private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
|
||||
#pragma acc loop worker(num:32) private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Successive vector loops. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32) private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
|
||||
x = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Addressable worker variable. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int x = 5, i, arr[32 * 32 * 32];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32) private(x)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
int *p = &x;
|
||||
|
||||
x = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
|
||||
*p = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += x * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared on a loop directive, broadcasting
|
||||
to vector-partitioned mode. Aggregate worker variable. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int x, y;
|
||||
} vec2;
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
vec2 pt;
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
#pragma acc loop worker(num:32) private(pt)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
pt.x = i ^ j * 3;
|
||||
pt.y = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.x * k;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
#include <assert.h>
|
||||
|
||||
/* Test of worker-private variables declared on loop directive, broadcasting
|
||||
to vector-partitioned mode. Array worker variable. */
|
||||
|
||||
int
|
||||
main (int argc, char* argv[])
|
||||
{
|
||||
int i, arr[32 * 32 * 32];
|
||||
int pt[2];
|
||||
|
||||
for (i = 0; i < 32 * 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
/* "pt" is treated as "present_or_copy" on the kernels directive because it
|
||||
is an array variable. */
|
||||
#pragma acc kernels copy(arr)
|
||||
{
|
||||
int j;
|
||||
|
||||
#pragma acc loop gang(num:32)
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
/* But here, it is made private per-worker. */
|
||||
#pragma acc loop worker(num:32) private(pt)
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
|
||||
pt[0] = i ^ j * 3;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
||||
|
||||
pt[1] = i | j * 5;
|
||||
|
||||
#pragma acc loop vector(length:32)
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
for (int j = 0; j < 32; j++)
|
||||
for (int k = 0; k < 32; k++)
|
||||
{
|
||||
int idx = i * 1024 + j * 32 + k;
|
||||
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
/* Verify that a simple, explicit acc loop reduction works inside
|
||||
a kernels region. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 100
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i, red = 0;
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
#pragma acc loop reduction (+:red)
|
||||
for (i = 0; i < N; i++)
|
||||
red++;
|
||||
}
|
||||
|
||||
if (red != N)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,6 +1,3 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-O2" } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <openacc.h>
|
||||
#include <gomp-constants.h>
|
||||
|
@ -74,6 +74,57 @@ void t2()
|
||||
}
|
||||
|
||||
|
||||
/* Test conditional vector-partitioned loops. */
|
||||
|
||||
void t3()
|
||||
{
|
||||
int n[32], arr[1024], i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
n[i] = 0;
|
||||
|
||||
#pragma acc parallel copy(n, arr) \
|
||||
num_gangs(32) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j, k;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
if ((j % 2) == 0)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k]--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (n[i] == 2);
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == ((i % 64) < 32) ? 1 : -1);
|
||||
}
|
||||
|
||||
|
||||
/* Test conditions inside vector-partitioned loops. */
|
||||
|
||||
void t4()
|
||||
@ -156,6 +207,79 @@ void t5()
|
||||
}
|
||||
|
||||
|
||||
/* Test switch containing vector-partitioned loops inside gang-partitioned
|
||||
loops. */
|
||||
|
||||
void t6()
|
||||
{
|
||||
int n[32], arr[1024], i;
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
arr[i] = 0;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
n[i] = i % 5;
|
||||
|
||||
#pragma acc parallel copy(n, arr) \
|
||||
num_gangs(32) num_workers(1) vector_length(32)
|
||||
{
|
||||
int j, k;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
switch (n[j])
|
||||
{
|
||||
case 1:
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k] += 1;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k] += 2;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k] += 3;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k] += 4;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
arr[j * 32 + k] += 5;
|
||||
break;
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
#pragma acc loop gang(static:*)
|
||||
for (j = 0; j < 32; j++)
|
||||
n[j]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
assert (n[i] == (i % 5) + 2);
|
||||
|
||||
for (i = 0; i < 1024; i++)
|
||||
assert (arr[i] == ((i / 32) % 5) + 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test trivial operation of vector-single mode. */
|
||||
|
||||
void t7()
|
||||
@ -381,6 +505,100 @@ void t13()
|
||||
}
|
||||
|
||||
|
||||
/* Test condition in worker-partitioned mode. */
|
||||
|
||||
void t14()
|
||||
{
|
||||
int arr[32 * 32 * 8], i;
|
||||
|
||||
for (i = 0; i < 32 * 32 * 8; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
int m;
|
||||
if ((k % 2) == 0)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
arr[j * 32 * 8 + k * 32 + m]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
arr[j * 32 * 8 + k * 32 + m] += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32 * 8; i++)
|
||||
assert (arr[i] == i + ((i / 32) % 2) + 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test switch in worker-partitioned mode. */
|
||||
|
||||
void t15()
|
||||
{
|
||||
int arr[32 * 32 * 8], i;
|
||||
|
||||
for (i = 0; i < 32 * 32 * 8; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
#pragma acc loop worker
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
int m;
|
||||
switch ((j * 32 + k) % 3)
|
||||
{
|
||||
case 0:
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
arr[j * 32 * 8 + k * 32 + m]++;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
arr[j * 32 * 8 + k * 32 + m] += 2;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
#pragma acc loop vector
|
||||
for (m = 0; m < 32; m++)
|
||||
arr[j * 32 * 8 + k * 32 + m] += 3;
|
||||
break;
|
||||
|
||||
default: ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32 * 8; i++)
|
||||
assert (arr[i] == i + ((i / 32) % 3) + 1);
|
||||
}
|
||||
|
||||
|
||||
/* Test worker-single/worker-partitioned transitions. */
|
||||
|
||||
void t16()
|
||||
@ -790,6 +1008,53 @@ void t25()
|
||||
}
|
||||
|
||||
|
||||
/* Test multiple conditional vector-partitioned loops in worker-single
|
||||
mode. */
|
||||
|
||||
void t26()
|
||||
{
|
||||
int arr[32 * 32], i;
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
arr[i] = i;
|
||||
|
||||
#pragma acc parallel copy(arr) \
|
||||
num_gangs(8) num_workers(8) vector_length(32)
|
||||
{
|
||||
int j;
|
||||
#pragma acc loop gang
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
int k;
|
||||
if ((j % 3) == 0)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[j * 32 + k] += 3;
|
||||
}
|
||||
}
|
||||
else if ((j % 3) == 1)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (k = 0; k < 32; k++)
|
||||
{
|
||||
#pragma acc atomic
|
||||
arr[j * 32 + k] += 7;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32 * 32; i++)
|
||||
{
|
||||
int j = (i / 32) % 3;
|
||||
assert (arr[i] == i + ((j == 0) ? 3 : (j == 1) ? 7 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test worker-single, vector-partitioned, gang-redundant mode. */
|
||||
|
||||
#define ACTUAL_GANGS 8
|
||||
@ -869,8 +1134,10 @@ int main()
|
||||
{
|
||||
t1();
|
||||
t2();
|
||||
t3();
|
||||
t4();
|
||||
t5();
|
||||
t6();
|
||||
t7();
|
||||
t8();
|
||||
t9();
|
||||
@ -878,6 +1145,8 @@ int main()
|
||||
t11();
|
||||
t12();
|
||||
t13();
|
||||
t14();
|
||||
t15();
|
||||
t16();
|
||||
t17();
|
||||
t18();
|
||||
@ -888,6 +1157,7 @@ int main()
|
||||
t23();
|
||||
t24();
|
||||
t25();
|
||||
t26();
|
||||
t27();
|
||||
t28();
|
||||
|
||||
|
@ -0,0 +1,38 @@
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define PK parallel
|
||||
#define M(x, y, z) O(x, y, z)
|
||||
#define O(x, y, z) x ## _ ## y ## _ ## z
|
||||
|
||||
#define F
|
||||
#define G none
|
||||
#define L
|
||||
#include "parallel-loop-1.h"
|
||||
#undef L
|
||||
#undef F
|
||||
#undef G
|
||||
|
||||
#define F num_gangs (10)
|
||||
#define G gangs
|
||||
#define L gang
|
||||
#include "parallel-loop-1.h"
|
||||
#undef L
|
||||
#undef F
|
||||
#undef G
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
if (test_none_none ()
|
||||
|| test_none_auto ()
|
||||
|| test_none_independent ()
|
||||
|| test_none_seq ()
|
||||
|| test_gangs_none ()
|
||||
|| test_gangs_auto ()
|
||||
|| test_gangs_independent ()
|
||||
|| test_gangs_seq ())
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
#define S
|
||||
#define N(x) M(x, G, none)
|
||||
#include "parallel-loop-2.h"
|
||||
#undef S
|
||||
#undef N
|
||||
#define S auto
|
||||
#define N(x) M(x, G, auto)
|
||||
#include "parallel-loop-2.h"
|
||||
#undef S
|
||||
#undef N
|
||||
#define S independent
|
||||
#define N(x) M(x, G, independent)
|
||||
#include "parallel-loop-2.h"
|
||||
#undef S
|
||||
#undef N
|
||||
#define S seq
|
||||
#define N(x) M(x, G, seq)
|
||||
#include "parallel-loop-2.h"
|
||||
#undef S
|
||||
#undef N
|
280
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h
Normal file
280
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h
Normal file
@ -0,0 +1,280 @@
|
||||
#ifndef VARS
|
||||
#define VARS
|
||||
int a[1500];
|
||||
float b[10][15][10];
|
||||
#pragma acc routine
|
||||
__attribute__((noreturn)) void
|
||||
noreturn (void)
|
||||
{
|
||||
for (;;);
|
||||
}
|
||||
#endif
|
||||
#ifndef SC
|
||||
#define SC
|
||||
#endif
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f0) (void)
|
||||
{
|
||||
int i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = 0; i < 1500; i++)
|
||||
a[i] += 2;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f1) (void)
|
||||
{
|
||||
#pragma acc PK loop L F
|
||||
for (unsigned int i = __INT_MAX__; i < 3000U + __INT_MAX__; i += 2)
|
||||
a[(i - __INT_MAX__) >> 1] -= 2;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f2) (void)
|
||||
{
|
||||
unsigned long long i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = __LONG_LONG_MAX__ + 4500ULL - 27;
|
||||
i > __LONG_LONG_MAX__ - 27ULL; i -= 3)
|
||||
a[(i + 26LL - __LONG_LONG_MAX__) / 3] -= 4;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f3) (long long n1, long long n2, long long s3)
|
||||
{
|
||||
#pragma acc PK loop L F
|
||||
for (long long i = n1 + 23; i > n2 - 25; i -= s3)
|
||||
a[i + 48] += 7;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f4) (void)
|
||||
{
|
||||
unsigned int i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = 30; i < 20; i += 2)
|
||||
a[i] += 10;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f5) (int n11, int n12, int n21, int n22, int n31, int n32,
|
||||
int s1, int s2, int s3)
|
||||
{
|
||||
SC int v1, v2, v3;
|
||||
#pragma acc PK loop L F
|
||||
for (v1 = n11; v1 < n12; v1 += s1)
|
||||
#pragma acc loop S
|
||||
for (v2 = n21; v2 < n22; v2 += s2)
|
||||
for (v3 = n31; v3 < n32; v3 += s3)
|
||||
b[v1][v2][v3] += 2.5;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f6) (int n11, int n12, int n21, int n22, long long n31, long long n32,
|
||||
int s1, int s2, long long int s3)
|
||||
{
|
||||
SC int v1, v2;
|
||||
SC long long v3;
|
||||
#pragma acc PK loop L F
|
||||
for (v1 = n11; v1 > n12; v1 += s1)
|
||||
#pragma acc loop S
|
||||
for (v2 = n21; v2 > n22; v2 += s2)
|
||||
for (v3 = n31; v3 > n32; v3 += s3)
|
||||
b[v1][v2 / 2][v3] -= 4.5;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f7) (void)
|
||||
{
|
||||
SC unsigned int v1, v3;
|
||||
SC unsigned long long v2;
|
||||
#pragma acc PK loop L F
|
||||
for (v1 = 0; v1 < 20; v1 += 2)
|
||||
#pragma acc loop S
|
||||
for (v2 = __LONG_LONG_MAX__ + 16ULL;
|
||||
v2 > __LONG_LONG_MAX__ - 29ULL; v2 -= 3)
|
||||
for (v3 = 10; v3 > 0; v3--)
|
||||
b[v1 >> 1][(v2 - __LONG_LONG_MAX__ + 64) / 3 - 12][v3 - 1] += 5.5;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f8) (void)
|
||||
{
|
||||
SC long long v1, v2, v3;
|
||||
#pragma acc PK loop L F
|
||||
for (v1 = 0; v1 < 20; v1 += 2)
|
||||
#pragma acc loop S
|
||||
for (v2 = 30; v2 < 20; v2++)
|
||||
for (v3 = 10; v3 < 0; v3--)
|
||||
b[v1][v2][v3] += 5.5;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f9) (void)
|
||||
{
|
||||
int i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = 20; i < 10; i++)
|
||||
{
|
||||
a[i] += 2;
|
||||
noreturn ();
|
||||
a[i] -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f10) (void)
|
||||
{
|
||||
SC int i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = 0; i < 10; i++)
|
||||
#pragma acc loop S
|
||||
for (int j = 10; j < 8; j++)
|
||||
for (long k = -10; k < 10; k++)
|
||||
{
|
||||
b[i][j][k] += 4;
|
||||
noreturn ();
|
||||
b[i][j][k] -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f11) (int n)
|
||||
{
|
||||
int i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = 20; i < n; i++)
|
||||
{
|
||||
a[i] += 8;
|
||||
noreturn ();
|
||||
a[i] -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f12) (int n)
|
||||
{
|
||||
SC int i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = 0; i < 10; i++)
|
||||
#pragma acc loop S
|
||||
for (int j = n; j < 8; j++)
|
||||
for (long k = -10; k < 10; k++)
|
||||
{
|
||||
b[i][j][k] += 16;
|
||||
noreturn ();
|
||||
b[i][j][k] -= 32;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f13) (void)
|
||||
{
|
||||
int *i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = a; i < &a[1500]; i++)
|
||||
i[0] += 2;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
N(f14) (void)
|
||||
{
|
||||
SC float *i;
|
||||
#pragma acc PK loop L F
|
||||
for (i = &b[0][0][0]; i < &b[0][0][10]; i++)
|
||||
#pragma acc loop S
|
||||
for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10)
|
||||
for (float *k = &b[0][0][10]; k > &b[0][0][0]; --k)
|
||||
b[i - &b[0][0][0]][(j - &b[0][0][0]) / 10 - 1][(k - &b[0][0][0]) - 1]
|
||||
-= 3.5;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) int
|
||||
N(test) (void)
|
||||
{
|
||||
int i, j, k;
|
||||
for (i = 0; i < 1500; i++)
|
||||
a[i] = i - 25;
|
||||
N(f0) ();
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 23)
|
||||
return 1;
|
||||
N(f1) ();
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 25)
|
||||
return 1;
|
||||
N(f2) ();
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 29)
|
||||
return 1;
|
||||
N(f3) (1500LL - 1 - 23 - 48, -1LL + 25 - 48, 1LL);
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 22)
|
||||
return 1;
|
||||
N(f3) (1500LL - 1 - 23 - 48, 1500LL - 1, 7LL);
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 22)
|
||||
return 1;
|
||||
N(f4) ();
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 22)
|
||||
return 1;
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
b[i][j][k] = i - 2.5 + 1.5 * j - 1.5 * k;
|
||||
N(f5) (0, 10, 0, 15, 0, 10, 1, 1, 1);
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
|
||||
return 1;
|
||||
N(f5) (0, 10, 30, 15, 0, 10, 4, 5, 6);
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
|
||||
return 1;
|
||||
N(f6) (9, -1, 29, 0, 9, -1, -1, -2, -1);
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
if (b[i][j][k] != i - 4.5 + 1.5 * j - 1.5 * k)
|
||||
return 1;
|
||||
N(f7) ();
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
|
||||
return 1;
|
||||
N(f8) ();
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
|
||||
return 1;
|
||||
N(f9) ();
|
||||
N(f10) ();
|
||||
N(f11) (10);
|
||||
N(f12) (12);
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 22)
|
||||
return 1;
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
|
||||
return 1;
|
||||
N(f13) ();
|
||||
N(f14) ();
|
||||
for (i = 0; i < 1500; i++)
|
||||
if (a[i] != i - 20)
|
||||
return 1;
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 15; j++)
|
||||
for (k = 0; k < 10; k++)
|
||||
if (b[i][j][k] != i - 2.5 + 1.5 * j - 1.5 * k)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
16
libgomp/testsuite/libgomp.oacc-fortran/cublas-fixed.h
Normal file
16
libgomp/testsuite/libgomp.oacc-fortran/cublas-fixed.h
Normal file
@ -0,0 +1,16 @@
|
||||
! CUDA BLAS interface binding for SAXPY.
|
||||
|
||||
use iso_c_binding
|
||||
interface
|
||||
subroutine cublassaxpy(N, alpha, x, incx, y, incy)
|
||||
1 bind(c, name="cublasSaxpy")
|
||||
use iso_c_binding
|
||||
integer(kind=c_int), value :: N
|
||||
real(kind=c_float), value :: alpha
|
||||
type(*), dimension(*) :: x
|
||||
integer(kind=c_int), value :: incx
|
||||
type(*), dimension(*) :: y
|
||||
integer(kind=c_int), value :: incy
|
||||
end subroutine cublassaxpy
|
||||
end interface
|
||||
|
@ -1,45 +1,212 @@
|
||||
! { dg-do run }
|
||||
! { dg-additional-options "-cpp" }
|
||||
|
||||
program test
|
||||
integer, parameter :: N = 8
|
||||
real, allocatable :: a(:), b(:)
|
||||
function is_mapped (n) result (rc)
|
||||
use openacc
|
||||
|
||||
allocate (a(N))
|
||||
allocate (b(N))
|
||||
integer, intent (in) :: n
|
||||
logical rc
|
||||
|
||||
a(:) = 3.0
|
||||
b(:) = 0.0
|
||||
#if ACC_MEM_SHARED
|
||||
integer i
|
||||
|
||||
!$acc enter data copyin (a(1:N), b(1:N))
|
||||
rc = .TRUE.
|
||||
i = n
|
||||
#else
|
||||
rc = acc_is_present (n, sizeof (n))
|
||||
#endif
|
||||
|
||||
!$acc parallel
|
||||
do i = 1, n
|
||||
b(i) = a (i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
end function is_mapped
|
||||
|
||||
!$acc exit data copyout (a(1:N), b(1:N))
|
||||
program main
|
||||
integer i, j
|
||||
logical is_mapped
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. 3.0) STOP 1
|
||||
if (b(i) .ne. 3.0) STOP 2
|
||||
end do
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
a(:) = 5.0
|
||||
b(:) = 1.0
|
||||
!$acc data copyin (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
!$acc enter data copyin (a(1:N), b(1:N))
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
!$acc parallel
|
||||
do i = 1, n
|
||||
b(i) = a (i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
!$acc exit data copyout (a(1:N), b(1:N))
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. 5.0) STOP 3
|
||||
if (b(i) .ne. 5.0) STOP 4
|
||||
end do
|
||||
end program test
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data copyout (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
!$acc parallel present (i, j)
|
||||
i = 4
|
||||
j = 2
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 4 .or. j .ne. 2) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data create (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data present_or_copyin (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data present_or_copyout (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
!$acc parallel present (i, j)
|
||||
i = 4
|
||||
j = 2
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 4 .or. j .ne. 2) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data present_or_copy (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
#else
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
#endif
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data present_or_create (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data copyin (i, j)
|
||||
!$acc data present (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data copyin (i, j)
|
||||
!$acc data present (i, j)
|
||||
if (is_mapped (i) .eqv. .FALSE.) call abort
|
||||
if (is_mapped (j) .eqv. .FALSE.) call abort
|
||||
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
i = -1
|
||||
j = -2
|
||||
|
||||
!$acc data
|
||||
#if !ACC_MEM_SHARED
|
||||
if (is_mapped (i) .eqv. .TRUE.) call abort
|
||||
if (is_mapped (j) .eqv. .TRUE.) call abort
|
||||
#endif
|
||||
if (i .ne. -1 .or. j .ne. -2) call abort
|
||||
|
||||
i = 2
|
||||
j = 1
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
!$acc end data
|
||||
|
||||
if (i .ne. 2 .or. j .ne. 1) call abort
|
||||
|
||||
end program main
|
||||
|
@ -1,8 +1,14 @@
|
||||
! { dg-do run }
|
||||
|
||||
program test
|
||||
use openacc
|
||||
integer, parameter :: N = 8
|
||||
real, allocatable :: a(:,:), b(:,:)
|
||||
real, allocatable :: c(:), d(:)
|
||||
integer i, j
|
||||
|
||||
i = 0
|
||||
j = 0
|
||||
|
||||
allocate (a(N,N))
|
||||
allocate (b(N,N))
|
||||
@ -28,4 +34,48 @@ program test
|
||||
if (b(j,i) .ne. 3.0) STOP 2
|
||||
end do
|
||||
end do
|
||||
|
||||
allocate (c(N))
|
||||
allocate (d(N))
|
||||
|
||||
c(:) = 3.0
|
||||
d(:) = 0.0
|
||||
|
||||
!$acc enter data copyin (c(1:N)) create (d(1:N)) async
|
||||
!$acc wait
|
||||
|
||||
!$acc parallel
|
||||
do i = 1, N
|
||||
d(i) = c(i) + 1
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc exit data copyout (c(1:N), d(1:N)) async
|
||||
!$acc wait
|
||||
|
||||
do i = 1, N
|
||||
if (d(i) .ne. 4.0) call abort
|
||||
end do
|
||||
|
||||
c(:) = 3.0
|
||||
d(:) = 0.0
|
||||
|
||||
!$acc enter data copyin (c(1:N)) async
|
||||
!$acc enter data create (d(1:N)) wait
|
||||
!$acc wait
|
||||
|
||||
!$acc parallel
|
||||
do i = 1, N
|
||||
d(i) = c(i) + 1
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc exit data copyout (d(1:N)) async
|
||||
!$acc exit data async
|
||||
!$acc wait
|
||||
|
||||
do i = 1, N
|
||||
if (d(i) .ne. 4.0) call abort
|
||||
end do
|
||||
|
||||
end program test
|
||||
|
28
libgomp/testsuite/libgomp.oacc-fortran/dummy-array.f90
Normal file
28
libgomp/testsuite/libgomp.oacc-fortran/dummy-array.f90
Normal file
@ -0,0 +1,28 @@
|
||||
! Ensure that dummy arrays are transferred to the accelerator
|
||||
! via an implicit pcopy.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer, parameter :: n = 1000
|
||||
integer :: a(n)
|
||||
integer :: i
|
||||
|
||||
a(:) = -1
|
||||
|
||||
call dummy_array (a, n)
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. i) call abort
|
||||
end do
|
||||
end program main
|
||||
|
||||
subroutine dummy_array (a, n)
|
||||
integer a(n)
|
||||
|
||||
!$acc parallel loop num_gangs (100) gang
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
end subroutine
|
98
libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90
Normal file
98
libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90
Normal file
@ -0,0 +1,98 @@
|
||||
! Test host_data interoperability with CUDA blas. This test was
|
||||
! derived from libgomp.oacc-c-c++-common/host_data-1.c.
|
||||
|
||||
! { dg-do run { target openacc_nvidia_accel_selected } }
|
||||
! { dg-additional-options "-lcublas -Wall -Wextra" }
|
||||
|
||||
program test
|
||||
implicit none
|
||||
|
||||
integer, parameter :: N = 10
|
||||
integer :: i
|
||||
real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
|
||||
|
||||
interface
|
||||
subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
|
||||
use iso_c_binding
|
||||
integer(kind=c_int), value :: N
|
||||
real(kind=c_float), value :: alpha
|
||||
type(*), dimension(*) :: x
|
||||
integer(kind=c_int), value :: incx
|
||||
type(*), dimension(*) :: y
|
||||
integer(kind=c_int), value :: incy
|
||||
end subroutine cublassaxpy
|
||||
end interface
|
||||
|
||||
a = 2.0
|
||||
|
||||
do i = 1, N
|
||||
x(i) = 4.0 * i
|
||||
y(i) = 3.0
|
||||
x_ref(i) = x(i)
|
||||
y_ref(i) = y(i)
|
||||
end do
|
||||
|
||||
call saxpy (N, a, x_ref, y_ref)
|
||||
|
||||
!$acc data copyin (x) copy (y)
|
||||
!$acc host_data use_device (x, y)
|
||||
call cublassaxpy(N, a, x, 1, y, 1)
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
!$acc data create (x) copyout (y)
|
||||
!$acc parallel loop
|
||||
do i = 1, N
|
||||
y(i) = 3.0
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
!$acc host_data use_device (x, y)
|
||||
call cublassaxpy(N, a, x, 1, y, 1)
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
y(:) = 3.0
|
||||
|
||||
!$acc data copyin (x) copyin (a) copy (y)
|
||||
!$acc parallel present (x) pcopy (y) present (a)
|
||||
call saxpy (N, a, x, y)
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
y(:) = 3.0
|
||||
|
||||
!$acc enter data copyin (x, a, y)
|
||||
!$acc parallel present (x) pcopy (y) present (a)
|
||||
call saxpy (N, a, x, y)
|
||||
!$acc end parallel
|
||||
!$acc exit data delete (x, a) copyout (y)
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
end program test
|
||||
|
||||
subroutine saxpy (nn, aa, xx, yy)
|
||||
integer :: nn
|
||||
real*4 :: aa, xx(nn), yy(nn)
|
||||
integer i
|
||||
!$acc routine
|
||||
|
||||
do i = 1, nn
|
||||
yy(i) = yy(i) + aa * xx(i)
|
||||
end do
|
||||
end subroutine saxpy
|
||||
|
||||
subroutine validate_results (n, a, b)
|
||||
integer :: n
|
||||
real*4 :: a(n), b(n)
|
||||
|
||||
do i = 1, N
|
||||
if (abs(a(i) - b(i)) > 0.0001) call abort
|
||||
end do
|
||||
end subroutine validate_results
|
85
libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f
Normal file
85
libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f
Normal file
@ -0,0 +1,85 @@
|
||||
! Fixed-mode host_data interaction with CUDA BLAS.
|
||||
|
||||
! { dg-do run { target openacc_nvidia_accel_selected } }
|
||||
! { dg-additional-options "-lcublas -Wall -Wextra" }
|
||||
|
||||
include "cublas-fixed.h"
|
||||
|
||||
integer, parameter :: N = 10
|
||||
integer :: i
|
||||
real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
|
||||
|
||||
a = 2.0
|
||||
|
||||
do i = 1, N
|
||||
x(i) = 4.0 * i
|
||||
y(i) = 3.0
|
||||
x_ref(i) = x(i)
|
||||
y_ref(i) = y(i)
|
||||
end do
|
||||
|
||||
call saxpy (N, a, x_ref, y_ref)
|
||||
|
||||
!$acc data copyin (x) copy (y)
|
||||
!$acc host_data use_device (x, y)
|
||||
call cublassaxpy(N, a, x, 1, y, 1)
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
!$acc data create (x) copyout (y)
|
||||
!$acc parallel loop
|
||||
do i = 1, N
|
||||
y(i) = 3.0
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
!$acc host_data use_device (x, y)
|
||||
call cublassaxpy(N, a, x, 1, y, 1)
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
y(:) = 3.0
|
||||
|
||||
!$acc data copyin (x) copyin (a) copy (y)
|
||||
!$acc parallel present (x) pcopy (y) present (a)
|
||||
call saxpy (N, a, x, y)
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
y(:) = 3.0
|
||||
|
||||
!$acc enter data copyin (x, a, y)
|
||||
!$acc parallel present (x) pcopy (y) present (a)
|
||||
call saxpy (N, a, x, y)
|
||||
!$acc end parallel
|
||||
!$acc exit data delete (x, a) copyout (y)
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
end
|
||||
|
||||
subroutine saxpy (nn, aa, xx, yy)
|
||||
integer :: nn
|
||||
real*4 :: aa, xx(nn), yy(nn)
|
||||
integer i
|
||||
!$acc routine
|
||||
|
||||
do i = 1, nn
|
||||
yy(i) = yy(i) + aa * xx(i)
|
||||
end do
|
||||
end subroutine saxpy
|
||||
|
||||
subroutine validate_results (n, a, b)
|
||||
integer :: n
|
||||
real*4 :: a(n), b(n)
|
||||
|
||||
do i = 1, N
|
||||
if (abs(a(i) - b(i)) > 0.0001) call abort
|
||||
end do
|
||||
end subroutine validate_results
|
||||
|
101
libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90
Normal file
101
libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90
Normal file
@ -0,0 +1,101 @@
|
||||
! Test host_data interoperability with CUDA blas using modules.
|
||||
|
||||
! { dg-do run { target openacc_nvidia_accel_selected } }
|
||||
! { dg-additional-options "-lcublas -Wall -Wextra" }
|
||||
|
||||
module cublas
|
||||
interface
|
||||
subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
|
||||
use iso_c_binding
|
||||
integer(kind=c_int), value :: N
|
||||
real(kind=c_float), value :: alpha
|
||||
type(*), dimension(*) :: x
|
||||
integer(kind=c_int), value :: incx
|
||||
type(*), dimension(*) :: y
|
||||
integer(kind=c_int), value :: incy
|
||||
end subroutine cublassaxpy
|
||||
end interface
|
||||
|
||||
contains
|
||||
subroutine saxpy (nn, aa, xx, yy)
|
||||
integer :: nn
|
||||
real*4 :: aa, xx(nn), yy(nn)
|
||||
integer i
|
||||
!$acc routine
|
||||
|
||||
do i = 1, nn
|
||||
yy(i) = yy(i) + aa * xx(i)
|
||||
end do
|
||||
end subroutine saxpy
|
||||
|
||||
subroutine validate_results (n, a, b)
|
||||
integer :: n
|
||||
real*4 :: a(n), b(n)
|
||||
|
||||
do i = 1, N
|
||||
if (abs(a(i) - b(i)) > 0.0001) call abort
|
||||
end do
|
||||
end subroutine validate_results
|
||||
end module cublas
|
||||
|
||||
program test
|
||||
use cublas
|
||||
implicit none
|
||||
|
||||
integer, parameter :: N = 10
|
||||
integer :: i
|
||||
real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
|
||||
|
||||
a = 2.0
|
||||
|
||||
do i = 1, N
|
||||
x(i) = 4.0 * i
|
||||
y(i) = 3.0
|
||||
x_ref(i) = x(i)
|
||||
y_ref(i) = y(i)
|
||||
end do
|
||||
|
||||
call saxpy (N, a, x_ref, y_ref)
|
||||
|
||||
!$acc data copyin (x) copy (y)
|
||||
!$acc host_data use_device (x, y)
|
||||
call cublassaxpy(N, a, x, 1, y, 1)
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
!$acc data create (x) copyout (y)
|
||||
!$acc parallel loop
|
||||
do i = 1, N
|
||||
y(i) = 3.0
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
!$acc host_data use_device (x, y)
|
||||
call cublassaxpy(N, a, x, 1, y, 1)
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
y(:) = 3.0
|
||||
|
||||
!$acc data copyin (x) copyin (a) copy (y)
|
||||
!$acc parallel present (x) pcopy (y) present (a)
|
||||
call saxpy (N, a, x, y)
|
||||
!$acc end parallel
|
||||
!$acc end data
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
|
||||
y(:) = 3.0
|
||||
|
||||
!$acc enter data copyin (x, a, y)
|
||||
!$acc parallel present (x) pcopy (y) present (a)
|
||||
call saxpy (N, a, x, y)
|
||||
!$acc end parallel
|
||||
!$acc exit data delete (x, a) copyout (y)
|
||||
|
||||
call validate_results (N, y, y_ref)
|
||||
end program test
|
@ -0,0 +1,26 @@
|
||||
program foo
|
||||
|
||||
IMPLICIT NONE
|
||||
INTEGER :: vol = 0
|
||||
|
||||
call bar (vol)
|
||||
|
||||
if (vol .ne. 4) call abort
|
||||
end program foo
|
||||
|
||||
subroutine bar(vol)
|
||||
IMPLICIT NONE
|
||||
|
||||
INTEGER :: vol
|
||||
INTEGER :: j,k
|
||||
|
||||
!$ACC KERNELS
|
||||
!$ACC LOOP REDUCTION(+:vol)
|
||||
DO k=1,2
|
||||
!$ACC LOOP REDUCTION(+:vol)
|
||||
DO j=1,2
|
||||
vol = vol + 1
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$ACC END KERNELS
|
||||
end subroutine bar
|
@ -0,0 +1,21 @@
|
||||
program foo
|
||||
IMPLICIT NONE
|
||||
INTEGER :: vol = 0
|
||||
|
||||
call bar (vol)
|
||||
|
||||
if (vol .ne. 2) call abort
|
||||
end program foo
|
||||
|
||||
subroutine bar(vol)
|
||||
IMPLICIT NONE
|
||||
INTEGER :: vol
|
||||
INTEGER :: j
|
||||
|
||||
!$ACC KERNELS
|
||||
!$ACC LOOP REDUCTION(+:vol)
|
||||
DO j=1,2
|
||||
vol = vol + 1
|
||||
ENDDO
|
||||
!$ACC END KERNELS
|
||||
end subroutine bar
|
@ -0,0 +1,30 @@
|
||||
! Test the collapse clause inside a kernels region.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program collapse3
|
||||
integer :: a(3,3,3), k, kk, kkk, l, ll, lll
|
||||
!$acc kernels
|
||||
!$acc loop collapse(3)
|
||||
do 115 k=1,3
|
||||
dokk: do kk=1,3
|
||||
do kkk=1,3
|
||||
a(k,kk,kkk) = 1
|
||||
enddo
|
||||
enddo dokk
|
||||
115 continue
|
||||
!$acc end kernels
|
||||
if (any(a(1:3,1:3,1:3).ne.1)) call abort
|
||||
|
||||
!$acc kernels
|
||||
!$acc loop collapse(3)
|
||||
dol: do 120 l=1,3
|
||||
doll: do ll=1,3
|
||||
do lll=1,3
|
||||
a(l,ll,lll) = 2
|
||||
enddo
|
||||
enddo doll
|
||||
120 end do dol
|
||||
!$acc end kernels
|
||||
if (any(a(1:3,1:3,1:3).ne.2)) call abort
|
||||
end program collapse3
|
@ -0,0 +1,41 @@
|
||||
! Test the collapse and reduction loop clauses inside a kernels region.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program collapse4
|
||||
integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
|
||||
logical :: l, r
|
||||
l = .false.
|
||||
r = .false.
|
||||
a(:, :, :) = 0
|
||||
b(:, :, :) = 0
|
||||
!$acc kernels
|
||||
!$acc loop collapse (3) reduction (.or.:l)
|
||||
do i = 2, 6
|
||||
do j = -2, 4
|
||||
do k = 13, 18
|
||||
l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
|
||||
l = l.or.k.lt.13.or.k.gt.18
|
||||
if (.not.l) a(i, j, k) = a(i, j, k) + 1
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
do i = 2, 6
|
||||
do j = -2, 4
|
||||
do k = 13, 18
|
||||
r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
|
||||
r = r.or.k.lt.13.or.k.gt.18
|
||||
if (.not.l) b(i, j, k) = b(i, j, k) + 1
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
if (l .neqv. r) call abort
|
||||
do i = 2, 6
|
||||
do j = -2, 4
|
||||
do k = 13, 18
|
||||
if (a(i, j, k) .ne. b(i, j, k)) call abort
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program collapse4
|
@ -0,0 +1,42 @@
|
||||
! { dg-do run }
|
||||
! { dg-additional-options "-cpp" }
|
||||
|
||||
#define N (1024 * 512)
|
||||
|
||||
subroutine foo (a, b, c)
|
||||
integer, parameter :: n = N
|
||||
integer, dimension (n) :: a
|
||||
integer, dimension (n) :: b
|
||||
integer, dimension (n) :: c
|
||||
integer i, ii
|
||||
|
||||
do i = 1, n
|
||||
a(i) = i * 2;
|
||||
end do
|
||||
|
||||
do i = 1, n
|
||||
b(i) = i * 4;
|
||||
end do
|
||||
|
||||
!$acc kernels copyin (a(1:n), b(1:n)) copyout (c(1:n))
|
||||
!$acc loop independent
|
||||
do ii = 1, n
|
||||
c(ii) = a(ii) + b(ii)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, n
|
||||
if (c(i) .ne. a(i) + b(i)) call abort
|
||||
end do
|
||||
|
||||
end subroutine
|
||||
|
||||
program main
|
||||
integer, parameter :: n = N
|
||||
integer :: a(n)
|
||||
integer :: b(n)
|
||||
integer :: c(n)
|
||||
|
||||
call foo (a, b, c)
|
||||
|
||||
end program main
|
66
libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90
Normal file
66
libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90
Normal file
@ -0,0 +1,66 @@
|
||||
! Exercise the auto, independent, seq and tile loop clauses inside
|
||||
! kernels regions.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program loops
|
||||
integer, parameter :: n = 20
|
||||
integer :: i, a(n), b(n)
|
||||
|
||||
a(:) = 0
|
||||
b(:) = 0
|
||||
|
||||
! COPY
|
||||
|
||||
!$acc kernels copy (a)
|
||||
!$acc loop auto
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, n
|
||||
b(i) = i
|
||||
end do
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! COPYOUT
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc kernels copyout (a)
|
||||
!$acc loop independent
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. b(i)) call abort
|
||||
end do
|
||||
call check (a, b, n)
|
||||
|
||||
! COPYIN
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc kernels copyout (a) copyin (b)
|
||||
!$acc loop seq
|
||||
do i = 1, n
|
||||
a(i) = b(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
end program loops
|
||||
|
||||
subroutine check (a, b, n)
|
||||
integer :: n, a(n), b(n)
|
||||
integer :: i
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. b(i)) call abort
|
||||
end do
|
||||
end subroutine check
|
116
libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90
Normal file
116
libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90
Normal file
@ -0,0 +1,116 @@
|
||||
! Test the copy, copyin, copyout, pcopy, pcopyin, pcopyout, and pcreate
|
||||
! clauses on kernels constructs.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program map
|
||||
integer, parameter :: n = 20, c = 10
|
||||
integer :: i, a(n), b(n), d(n)
|
||||
|
||||
a(:) = 0
|
||||
b(:) = 0
|
||||
|
||||
! COPY
|
||||
|
||||
!$acc kernels copy (a)
|
||||
!$acc loop
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, n
|
||||
b(i) = i
|
||||
end do
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! COPYOUT
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc kernels copyout (a)
|
||||
!$acc loop
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. b(i)) call abort
|
||||
end do
|
||||
call check (a, b, n)
|
||||
|
||||
! COPYIN
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc kernels copyout (a) copyin (b)
|
||||
!$acc loop
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! PRESENT_OR_COPY
|
||||
|
||||
!$acc kernels pcopy (a)
|
||||
!$acc loop
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! PRESENT_OR_COPYOUT
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc kernels pcopyout (a)
|
||||
!$acc loop
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! PRESENT_OR_COPYIN
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc kernels pcopyout (a) pcopyin (b)
|
||||
!$acc loop
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! PRESENT_OR_CREATE
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc kernels pcopyout (a) pcreate (d)
|
||||
!$acc loop
|
||||
do i = 1, n
|
||||
d(i) = i
|
||||
a(i) = d(i)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
call check (a, b, n)
|
||||
end program map
|
||||
|
||||
subroutine check (a, b, n)
|
||||
integer :: n, a(n), b(n)
|
||||
integer :: i
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. b(i)) call abort
|
||||
end do
|
||||
end subroutine check
|
@ -0,0 +1,36 @@
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
implicit none
|
||||
integer, parameter :: n = 1024
|
||||
integer, dimension (0:n-1) :: a, b, c
|
||||
integer :: i, ii
|
||||
|
||||
!$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
|
||||
|
||||
!$acc kernels present (a(0:n-1))
|
||||
do i = 0, n - 1
|
||||
a(i) = i * 2
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc parallel present (b(0:n-1))
|
||||
!$acc loop
|
||||
do i = 0, n -1
|
||||
b(i) = i * 4
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
!$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
|
||||
do ii = 0, n - 1
|
||||
c(ii) = a(ii) + b(ii)
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
!$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
|
||||
|
||||
do i = 0, n - 1
|
||||
if (c(i) .ne. a(i) + b(i)) call abort
|
||||
end do
|
||||
|
||||
end program main
|
@ -0,0 +1,23 @@
|
||||
! Test of gang-private variables declared on loop directive.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, arr(32)
|
||||
|
||||
do i = 1, 32
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32) private(x)
|
||||
do i = 1, 32
|
||||
x = i * 2;
|
||||
arr(i) = arr(i) + x;
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 1, 32
|
||||
if (arr(i) .ne. i * 3) call abort
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,28 @@
|
||||
! Test of gang-private variables declared on loop directive, with broadcasting
|
||||
! to partitioned workers.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, j, arr(0:32*32)
|
||||
|
||||
do i = 0, 32*32 -1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32) private(x)
|
||||
do i = 0, 31
|
||||
x = i * 2;
|
||||
|
||||
!$acc loop worker(num:32)
|
||||
do j = 0, 31
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + x;
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + (i / 32) * 2) call abort
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,28 @@
|
||||
! Test of gang-private variables declared on loop directive, with broadcasting
|
||||
! to partitioned vectors.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, j, arr(0:32*32)
|
||||
|
||||
do i = 0, 32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32) private(x)
|
||||
do i = 0, 31
|
||||
x = i * 2;
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do j = 0, 31
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + x;
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + (i / 32) * 2) call abort
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,36 @@
|
||||
! Test of gang-private addressable variable declared on loop directive, with
|
||||
! broadcasting to partitioned workers.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
type vec3
|
||||
integer x, y, z, attr(13)
|
||||
end type vec3
|
||||
|
||||
integer x, i, j, arr(0:32*32)
|
||||
type(vec3) pt
|
||||
|
||||
do i = 0, 32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32) private(pt)
|
||||
do i = 0, 31
|
||||
pt%x = i
|
||||
pt%y = i * 2
|
||||
pt%z = i * 4
|
||||
pt%attr(5) = i * 6
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do j = 0, 31
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5);
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + (i / 32) * 13) call abort
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,41 @@
|
||||
! Test of vector-private variables declared on loop directive.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8)
|
||||
do j = 0, 31
|
||||
!$acc loop vector(length:32) private(x)
|
||||
do k = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
!$acc loop vector(length:32) private(x)
|
||||
do k = 0, 31
|
||||
x = ior(i, j * 5)
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,38 @@
|
||||
! Test of vector-private variables declared on loop directive. Array type.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8)
|
||||
do j = 0, 31
|
||||
!$acc loop vector(length:32) private(x, pt)
|
||||
do k = 0, 31
|
||||
pt(1) = ieor(i, j * 3)
|
||||
pt(2) = ior(i, j * 5)
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,27 @@
|
||||
! Test of worker-private variables declared on a loop directive.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, j, arr(0:32*32)
|
||||
common x
|
||||
|
||||
do i = 0, 32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32) private(x)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8) private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
arr(i * 32 + j) = arr(i * 32 + j) + x
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 * 32 - 1
|
||||
if (arr(i) .ne. i + ieor(i / 32, mod(i, 32) * 3)) call abort
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,36 @@
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8) private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k) call abort
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,48 @@
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Back-to-back worker loops.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8) private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
|
||||
!$acc loop worker(num:8) private(x)
|
||||
do j = 0, 31
|
||||
x = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,45 @@
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Successive vector loops. */
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: x, i, j, k, idx, arr(0:32*32*32)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8) private(x)
|
||||
do j = 0, 31
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
|
||||
x = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,48 @@
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Addressable worker variable.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: i, j, k, idx, arr(0:32*32*32)
|
||||
integer, target :: x
|
||||
integer, pointer :: p
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8) private(x, p)
|
||||
do j = 0, 31
|
||||
p => x
|
||||
x = ieor(i, j * 3)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
|
||||
p = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,49 @@
|
||||
! Test of worker-private variables declared on a loop directive, broadcasting
|
||||
! to vector-partitioned mode. Aggregate worker variable.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
type vec2
|
||||
integer x, y
|
||||
end type vec2
|
||||
|
||||
integer :: i, j, k, idx, arr(0:32*32*32)
|
||||
type(vec2) :: pt
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8) private(pt)
|
||||
do j = 0, 31
|
||||
pt%x = ieor(i, j * 3)
|
||||
pt%y = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k
|
||||
end do
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,44 @@
|
||||
! Test of worker-private variables declared on loop directive, broadcasting
|
||||
! to vector-partitioned mode. Array worker variable.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
|
||||
|
||||
do i = 0, 32*32*32-1
|
||||
arr(i) = i
|
||||
end do
|
||||
|
||||
!$acc kernels copy(arr)
|
||||
!$acc loop gang(num:32)
|
||||
do i = 0, 31
|
||||
!$acc loop worker(num:8) private(pt)
|
||||
do j = 0, 31
|
||||
pt(1) = ieor(i, j * 3)
|
||||
pt(2) = ior(i, j * 5)
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
|
||||
end do
|
||||
|
||||
!$acc loop vector(length:32)
|
||||
do k = 0, 31
|
||||
arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
do i = 0, 32 - 1
|
||||
do j = 0, 32 -1
|
||||
do k = 0, 32 - 1
|
||||
idx = i * 1024 + j * 32 + k
|
||||
if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
|
||||
call abort
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end program main
|
@ -0,0 +1,19 @@
|
||||
! Test a simple acc loop reduction inside a kernels region.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program reduction
|
||||
integer, parameter :: n = 20
|
||||
integer :: i, red
|
||||
|
||||
red = 0
|
||||
|
||||
!$acc kernels
|
||||
!$acc loop reduction (+:red)
|
||||
do i = 1, n
|
||||
red = red + 1
|
||||
end do
|
||||
!$acc end kernels
|
||||
|
||||
if (red .ne. n) call abort
|
||||
end program reduction
|
27
libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90
Normal file
27
libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90
Normal file
@ -0,0 +1,27 @@
|
||||
! { dg-do run }
|
||||
! { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "-O0" "-O1" } { "" } }
|
||||
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
|
||||
integer :: i, j, n
|
||||
|
||||
j = 0
|
||||
n = 1000000
|
||||
|
||||
!$acc parallel async (0) copy (j)
|
||||
do i = 1, 1000000
|
||||
j = j + 1
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
call acc_wait_async (0, 1)
|
||||
|
||||
if (acc_async_test (0) .neqv. .TRUE.) call abort
|
||||
|
||||
if (acc_async_test (1) .neqv. .TRUE.) call abort
|
||||
|
||||
call acc_wait (1)
|
||||
|
||||
end program
|
34
libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
Normal file
34
libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
Normal file
@ -0,0 +1,34 @@
|
||||
! { dg-do run }
|
||||
! { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "-O0" "-O1" } { "" } }
|
||||
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
|
||||
integer :: i, j
|
||||
integer, parameter :: N = 1000000
|
||||
integer, parameter :: nprocs = 2
|
||||
integer :: k(nprocs)
|
||||
|
||||
k(:) = 0
|
||||
|
||||
!$acc data copy (k(1:nprocs))
|
||||
do j = 1, nprocs
|
||||
!$acc parallel async (j)
|
||||
do i = 1, N
|
||||
k(j) = k(j) + 1
|
||||
end do
|
||||
!$acc end parallel
|
||||
end do
|
||||
!$acc end data
|
||||
|
||||
if (acc_async_test (1) .neqv. .TRUE.) call abort
|
||||
if (acc_async_test (2) .neqv. .TRUE.) call abort
|
||||
|
||||
call acc_wait_all_async (nprocs + 1)
|
||||
|
||||
if (acc_async_test (nprocs + 1) .neqv. .TRUE.) call abort
|
||||
|
||||
call acc_wait_all ()
|
||||
|
||||
end program
|
82
libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90
Normal file
82
libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90
Normal file
@ -0,0 +1,82 @@
|
||||
! Exercise the data movement runtime library functions on non-shared memory
|
||||
! targets.
|
||||
|
||||
! { dg-do run { target openacc_nvidia_accel_selected } }
|
||||
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
|
||||
integer, parameter :: N = 256
|
||||
integer, allocatable :: h(:)
|
||||
integer :: i
|
||||
|
||||
allocate (h(N))
|
||||
|
||||
do i = 1, N
|
||||
h(i) = i
|
||||
end do
|
||||
|
||||
call acc_present_or_copyin (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .TRUE.) call abort
|
||||
|
||||
call acc_copyout (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .FALSE.) call abort
|
||||
|
||||
do i = 1, N
|
||||
if (h(i) /= i) call abort
|
||||
end do
|
||||
|
||||
do i = 1, N
|
||||
h(i) = i + i
|
||||
end do
|
||||
|
||||
call acc_pcopyin (h, sizeof (h))
|
||||
|
||||
if (acc_is_present (h) .neqv. .TRUE.) call abort
|
||||
|
||||
call acc_copyout (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .FALSE.) call abort
|
||||
|
||||
do i = 1, N
|
||||
if (h(i) /= i + i) call abort
|
||||
end do
|
||||
|
||||
call acc_create (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .TRUE.) call abort
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, N
|
||||
h(i) = i
|
||||
end do
|
||||
!$end acc parallel
|
||||
|
||||
call acc_copyout (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .FALSE.) call abort
|
||||
|
||||
do i = 1, N
|
||||
if (h(i) /= i) call abort
|
||||
end do
|
||||
|
||||
call acc_present_or_create (h, sizeof (h))
|
||||
|
||||
if (acc_is_present (h) .neqv. .TRUE.) call abort
|
||||
|
||||
call acc_delete (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .FALSE.) call abort
|
||||
|
||||
call acc_pcreate (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .TRUE.) call abort
|
||||
|
||||
call acc_delete (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .FALSE.) call abort
|
||||
|
||||
end program
|
52
libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90
Normal file
52
libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90
Normal file
@ -0,0 +1,52 @@
|
||||
! { dg-do run }
|
||||
! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
|
||||
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
|
||||
integer, parameter :: N = 256
|
||||
integer, allocatable :: h(:)
|
||||
integer :: i
|
||||
|
||||
allocate (h(N))
|
||||
|
||||
do i = 1, N
|
||||
h(i) = i
|
||||
end do
|
||||
|
||||
call acc_copyin (h)
|
||||
|
||||
do i = 1, N
|
||||
h(i) = i + i
|
||||
end do
|
||||
|
||||
call acc_update_device (h, sizeof (h))
|
||||
|
||||
if (acc_is_present (h) .neqv. .TRUE.) call abort
|
||||
|
||||
h(:) = 0
|
||||
|
||||
call acc_copyout (h, sizeof (h))
|
||||
|
||||
do i = 1, N
|
||||
if (h(i) /= i + i) call abort
|
||||
end do
|
||||
|
||||
call acc_copyin (h, sizeof (h))
|
||||
|
||||
h(:) = 0
|
||||
|
||||
call acc_update_self (h, sizeof (h))
|
||||
|
||||
if (acc_is_present (h) .neqv. .TRUE.) call abort
|
||||
|
||||
do i = 1, N
|
||||
if (h(i) /= i + i) call abort
|
||||
end do
|
||||
|
||||
call acc_delete (h)
|
||||
|
||||
if (acc_is_present (h) .neqv. .FALSE.) call abort
|
||||
|
||||
end program
|
77
libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90
Normal file
77
libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90
Normal file
@ -0,0 +1,77 @@
|
||||
! Exercise the auto, independent, seq and tile loop clauses inside
|
||||
! parallel regions.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
program loops
|
||||
integer, parameter :: n = 20, c = 10
|
||||
integer :: i, a(n), b(n)
|
||||
|
||||
a(:) = 0
|
||||
b(:) = 0
|
||||
|
||||
! COPY
|
||||
|
||||
!$acc parallel copy (a)
|
||||
!$acc loop auto
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, n
|
||||
b(i) = i
|
||||
end do
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! COPYOUT
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc parallel copyout (a)
|
||||
!$acc loop independent
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. b(i)) call abort
|
||||
end do
|
||||
call check (a, b, n)
|
||||
|
||||
! COPYIN
|
||||
|
||||
a(:) = 0
|
||||
|
||||
!$acc parallel copyout (a) copyin (b)
|
||||
!$acc loop seq
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
! PRESENT_OR_COPY
|
||||
|
||||
!$acc parallel pcopy (a)
|
||||
!$acc loop tile (*)
|
||||
do i = 1, n
|
||||
a(i) = i
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
call check (a, b, n)
|
||||
|
||||
end program loops
|
||||
|
||||
subroutine check (a, b, n)
|
||||
integer :: n, a(n), b(n)
|
||||
integer :: i
|
||||
|
||||
do i = 1, n
|
||||
if (a(i) .ne. b(i)) call abort
|
||||
end do
|
||||
end subroutine check
|
@ -0,0 +1,38 @@
|
||||
! Test reductions on dummy arguments inside modules.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
module prm
|
||||
implicit none
|
||||
|
||||
contains
|
||||
|
||||
subroutine param_reduction(var)
|
||||
implicit none
|
||||
integer(kind=8) :: var
|
||||
integer :: j,k
|
||||
|
||||
!$acc parallel copy(var)
|
||||
!$acc loop reduction(+ : var) gang
|
||||
do k=1,10
|
||||
!$acc loop vector reduction(+ : var)
|
||||
do j=1,100
|
||||
var = var + 1.0
|
||||
enddo
|
||||
enddo
|
||||
!$acc end parallel
|
||||
end subroutine param_reduction
|
||||
|
||||
end module prm
|
||||
|
||||
program test
|
||||
use prm
|
||||
implicit none
|
||||
|
||||
integer(8) :: r
|
||||
|
||||
r=10.0
|
||||
call param_reduction (r)
|
||||
|
||||
if (r .ne. 1010) call abort ()
|
||||
end program test
|
41
libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90
Normal file
41
libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90
Normal file
@ -0,0 +1,41 @@
|
||||
! { dg-do run }
|
||||
|
||||
module param
|
||||
integer, parameter :: N = 32
|
||||
end module param
|
||||
|
||||
program main
|
||||
use param
|
||||
integer :: i
|
||||
integer :: a(N)
|
||||
|
||||
do i = 1, N
|
||||
a(i) = i
|
||||
end do
|
||||
|
||||
!$acc parallel copy (a)
|
||||
!$acc loop worker
|
||||
do i = 1, N
|
||||
call vector (a)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
do i = 1, N
|
||||
if (a(i) .ne. 0) call abort
|
||||
end do
|
||||
|
||||
contains
|
||||
|
||||
subroutine vector (a)
|
||||
!$acc routine vector
|
||||
integer, intent (inout) :: a(N)
|
||||
integer :: i
|
||||
|
||||
!$acc loop vector
|
||||
do i = 1, N
|
||||
a(i) = a(i) - a(i)
|
||||
end do
|
||||
|
||||
end subroutine vector
|
||||
|
||||
end program main
|
Loading…
x
Reference in New Issue
Block a user