openmp: allocate clause vs. *reduction array sections [PR97670]

This patch finds the base expression of reduction array sections and uses it
in checks whether allocate clause lists only variables that have been privatized.
Also fixes a pasto that caused an ICE.

2020-11-04  Jakub Jelinek  <jakub@redhat.com>

	PR c++/97670
gcc/c-family/
	* c-omp.c (c_omp_split_clauses): Look through array reductions to find
	underlying decl to clear in the allocate_head bitmap.
gcc/c/
	* c-typeck.c (c_finish_omp_clauses): Look through array reductions to
	find underlying decl to clear in the aligned_head bitmap.
gcc/cp/
	* semantics.c (finish_omp_clauses): Look through array reductions to
	find underlying decl to clear in the aligned_head bitmap.  Use
	DECL_UID (t) instead of DECL_UID (OMP_CLAUSE_DECL (c)) when clearing
	in the bitmap.  Only diagnose errors about allocate vars not being
	privatized on the same construct on allocate clause if it has
	a DECL_P OMP_CLAUSE_DECL.
gcc/testsuite/
	* c-c++-common/gomp/allocate-4.c: New test.
	* g++.dg/gomp/allocate-2.C: New test.
	* g++.dg/gomp/allocate-3.C: New test.
This commit is contained in:
Jakub Jelinek 2020-11-04 09:37:22 +01:00
parent 7988c76eba
commit 9649031577
6 changed files with 340 additions and 16 deletions

View File

@ -2289,13 +2289,36 @@ c_omp_split_clauses (location_t loc, enum tree_code code,
for (c = cclauses[i]; c; c = OMP_CLAUSE_CHAIN (c))
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
case OMP_CLAUSE_TASK_REDUCTION:
if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
{
tree t = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0);
if (TREE_CODE (t) == POINTER_PLUS_EXPR)
t = TREE_OPERAND (t, 0);
if (TREE_CODE (t) == ADDR_EXPR
|| TREE_CODE (t) == INDIRECT_REF)
t = TREE_OPERAND (t, 0);
if (DECL_P (t))
bitmap_clear_bit (&allocate_head, DECL_UID (t));
break;
}
else if (TREE_CODE (OMP_CLAUSE_DECL (c)) == TREE_LIST)
{
tree t;
for (t = OMP_CLAUSE_DECL (c);
TREE_CODE (t) == TREE_LIST; t = TREE_CHAIN (t))
;
if (DECL_P (t))
bitmap_clear_bit (&allocate_head, DECL_UID (t));
break;
}
/* FALLTHRU */
case OMP_CLAUSE_PRIVATE:
case OMP_CLAUSE_FIRSTPRIVATE:
case OMP_CLAUSE_LASTPRIVATE:
case OMP_CLAUSE_LINEAR:
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
case OMP_CLAUSE_TASK_REDUCTION:
if (DECL_P (OMP_CLAUSE_DECL (c)))
bitmap_clear_bit (&allocate_head,
DECL_UID (OMP_CLAUSE_DECL (c)));

View File

@ -15153,13 +15153,26 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
if (allocate_seen)
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
case OMP_CLAUSE_TASK_REDUCTION:
if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
{
t = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0);
if (TREE_CODE (t) == POINTER_PLUS_EXPR)
t = TREE_OPERAND (t, 0);
if (TREE_CODE (t) == ADDR_EXPR
|| TREE_CODE (t) == INDIRECT_REF)
t = TREE_OPERAND (t, 0);
if (DECL_P (t))
bitmap_clear_bit (&aligned_head, DECL_UID (t));
break;
}
/* FALLTHRU */
case OMP_CLAUSE_PRIVATE:
case OMP_CLAUSE_FIRSTPRIVATE:
case OMP_CLAUSE_LASTPRIVATE:
case OMP_CLAUSE_LINEAR:
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
case OMP_CLAUSE_TASK_REDUCTION:
if (DECL_P (OMP_CLAUSE_DECL (c)))
bitmap_clear_bit (&aligned_head,
DECL_UID (OMP_CLAUSE_DECL (c)));

View File

@ -8190,17 +8190,11 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
}
t = OMP_CLAUSE_DECL (c);
if (processing_template_decl
&& !VAR_P (t) && TREE_CODE (t) != PARM_DECL)
{
pc = &OMP_CLAUSE_CHAIN (c);
continue;
}
switch (c_kind)
{
case OMP_CLAUSE_LASTPRIVATE:
if (!bitmap_bit_p (&firstprivate_head, DECL_UID (t)))
if (DECL_P (t)
&& !bitmap_bit_p (&firstprivate_head, DECL_UID (t)))
{
need_default_ctor = true;
need_dtor = true;
@ -8210,6 +8204,34 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
case OMP_CLAUSE_TASK_REDUCTION:
if (allocate_seen)
{
if (TREE_CODE (t) == MEM_REF)
{
t = TREE_OPERAND (t, 0);
if (TREE_CODE (t) == POINTER_PLUS_EXPR)
t = TREE_OPERAND (t, 0);
if (TREE_CODE (t) == ADDR_EXPR
|| TREE_CODE (t) == INDIRECT_REF)
t = TREE_OPERAND (t, 0);
if (DECL_P (t))
bitmap_clear_bit (&aligned_head, DECL_UID (t));
}
else if (TREE_CODE (t) == TREE_LIST)
{
while (TREE_CODE (t) == TREE_LIST)
t = TREE_CHAIN (t);
if (DECL_P (t))
bitmap_clear_bit (&aligned_head, DECL_UID (t));
t = OMP_CLAUSE_DECL (c);
}
else if (DECL_P (t))
bitmap_clear_bit (&aligned_head, DECL_UID (t));
t = OMP_CLAUSE_DECL (c);
}
if (processing_template_decl
&& !VAR_P (t) && TREE_CODE (t) != PARM_DECL)
break;
if (finish_omp_reduction_clause (c, &need_default_ctor,
&need_dtor))
remove = true;
@ -8218,6 +8240,9 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
break;
case OMP_CLAUSE_COPYIN:
if (processing_template_decl
&& !VAR_P (t) && TREE_CODE (t) != PARM_DECL)
break;
if (!VAR_P (t) || !CP_DECL_THREAD_LOCAL_P (t))
{
error_at (OMP_CLAUSE_LOCATION (c),
@ -8230,6 +8255,13 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
break;
}
if (processing_template_decl
&& !VAR_P (t) && TREE_CODE (t) != PARM_DECL)
{
pc = &OMP_CLAUSE_CHAIN (c);
continue;
}
if (need_complete_type || need_copy_assignment)
{
t = require_complete_type (t);
@ -8247,8 +8279,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
if (allocate_seen
&& OMP_CLAUSE_CODE (c) != OMP_CLAUSE_SHARED
&& DECL_P (t))
bitmap_clear_bit (&aligned_head,
DECL_UID (OMP_CLAUSE_DECL (c)));
bitmap_clear_bit (&aligned_head, DECL_UID (t));
if (VAR_P (t) && CP_DECL_THREAD_LOCAL_P (t))
share_name = "threadprivate";
@ -8349,6 +8380,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
bool remove = false;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ALLOCATE
&& !OMP_CLAUSE_ALLOCATE_COMBINED (c)
&& DECL_P (OMP_CLAUSE_DECL (c))
&& bitmap_bit_p (&aligned_head, DECL_UID (OMP_CLAUSE_DECL (c))))
{
error_at (OMP_CLAUSE_LOCATION (c),

View File

@ -0,0 +1,39 @@
void
foo (void)
{
int s[4] = { 0, 0, 0, 0 };
int *p = s;
#pragma omp parallel reduction (+: s) allocate(s)
s[0]++;
#pragma omp parallel reduction (+: s[0:3]) allocate(s)
s[0]++;
#pragma omp parallel reduction (+: s[2:2]) allocate(s)
s[2]++;
#pragma omp parallel reduction (+: p[:2]) allocate(p)
p[0]++;
#pragma omp parallel reduction (+: p[2:2]) allocate(p)
p[2]++;
}
void
bar (void)
{
int s[4] = { 0, 0, 0, 0 };
int *p = s;
int i;
#pragma omp teams distribute parallel for reduction (+: s) allocate(s)
for (i = 0; i < 64; i++)
s[0]++;
#pragma omp teams distribute parallel for reduction (+: s[0:3]) allocate(s)
for (i = 0; i < 64; i++)
s[0]++;
#pragma omp teams distribute parallel for reduction (+: s[2:2]) allocate(s)
for (i = 0; i < 64; i++)
s[2]++;
#pragma omp teams distribute parallel for reduction (+: p[:2]) allocate(p)
for (i = 0; i < 64; i++)
p[0]++;
#pragma omp teams distribute parallel for reduction (+: p[2:2]) allocate(p)
for (i = 0; i < 64; i++)
p[2]++;
}

View File

@ -0,0 +1,11 @@
// PR c++/97670
struct S { int s; };
void
foo ()
{
S s[1] = { S () };
#pragma omp parallel reduction (+: s) allocate(s) // { dg-error "user defined reduction not found for 's'" }
s[0].s++;
}

View File

@ -0,0 +1,206 @@
template <typename T>
void
foo (T &x, T (&y)[4], T *&z, int &u, int (&v)[4], int *&w)
{
T s[4] = { 0, 0, 0, 0 };
T *p = s;
#pragma omp parallel reduction (+: s) allocate(s)
s[0]++;
#pragma omp parallel reduction (+: s[0:3]) allocate(s)
s[0]++;
#pragma omp parallel reduction (+: s[2:2]) allocate(s)
s[2]++;
#pragma omp parallel reduction (+: p[:2]) allocate(p)
p[0]++;
#pragma omp parallel reduction (+: p[2:2]) allocate(p)
p[2]++;
int s2[4] = { 0, 0, 0, 0 };
int *p2 = s2;
#pragma omp parallel reduction (+: s2) allocate(s2)
s2[0]++;
#pragma omp parallel reduction (+: s2[0:3]) allocate(s2)
s2[0]++;
#pragma omp parallel reduction (+: s2[2:2]) allocate(s2)
s2[2]++;
#pragma omp parallel reduction (+: p2[:2]) allocate(p2)
p2[0]++;
#pragma omp parallel reduction (+: p2[2:2]) allocate(p2)
p2[2]++;
#pragma omp parallel reduction (+: x) allocate(x)
x++;
#pragma omp parallel reduction (+: y) allocate(y)
y[0]++;
#pragma omp parallel reduction (+: y[0:3]) allocate(y)
y[0]++;
#pragma omp parallel reduction (+: y[2:2]) allocate(y)
y[2]++;
#pragma omp parallel reduction (+: z[:2]) allocate(z)
z[0]++;
#pragma omp parallel reduction (+: z[2:2]) allocate(z)
z[2]++;
#pragma omp parallel reduction (+: u) allocate(u)
u++;
#pragma omp parallel reduction (+: v) allocate(v)
v[0]++;
#pragma omp parallel reduction (+: v[0:3]) allocate(v)
v[0]++;
#pragma omp parallel reduction (+: v[2:2]) allocate(v)
v[2]++;
#pragma omp parallel reduction (+: w[:2]) allocate(w)
w[0]++;
#pragma omp parallel reduction (+: w[2:2]) allocate(w)
w[2]++;
}
template <typename T>
void
bar (T &x, T (&y)[4], T *&z, int &u, int (&v)[4], int *&w)
{
T s[4] = { 0, 0, 0, 0 };
T *p = s;
int i;
#pragma omp teams distribute parallel for reduction (+: s) allocate(s)
for (i = 0; i < 64; i++)
s[0]++;
#pragma omp teams distribute parallel for reduction (+: s[0:3]) allocate(s)
for (i = 0; i < 64; i++)
s[0]++;
#pragma omp teams distribute parallel for reduction (+: s[2:2]) allocate(s)
for (i = 0; i < 64; i++)
s[2]++;
#pragma omp teams distribute parallel for reduction (+: p[:2]) allocate(p)
for (i = 0; i < 64; i++)
p[0]++;
#pragma omp teams distribute parallel for reduction (+: p[2:2]) allocate(p)
for (i = 0; i < 64; i++)
p[2]++;
int s2[4] = { 0, 0, 0, 0 };
int *p2 = s2;
#pragma omp teams distribute parallel for reduction (+: s2) allocate(s2)
for (i = 0; i < 64; i++)
s2[0]++;
#pragma omp teams distribute parallel for reduction (+: s2[0:3]) allocate(s2)
for (i = 0; i < 64; i++)
s2[0]++;
#pragma omp teams distribute parallel for reduction (+: s2[2:2]) allocate(s2)
for (i = 0; i < 64; i++)
s2[2]++;
#pragma omp teams distribute parallel for reduction (+: p2[:2]) allocate(p2)
for (i = 0; i < 64; i++)
p2[0]++;
#pragma omp teams distribute parallel for reduction (+: p2[2:2]) allocate(p2)
for (i = 0; i < 64; i++)
p2[2]++;
#pragma omp teams distribute parallel for reduction (+: x) allocate(x)
for (i = 0; i < 64; i++)
x++;
#pragma omp teams distribute parallel for reduction (+: y) allocate(y)
for (i = 0; i < 64; i++)
y[0]++;
#pragma omp teams distribute parallel for reduction (+: y[0:3]) allocate(y)
for (i = 0; i < 64; i++)
y[0]++;
#pragma omp teams distribute parallel for reduction (+: y[2:2]) allocate(y)
for (i = 0; i < 64; i++)
y[2]++;
#pragma omp teams distribute parallel for reduction (+: z[:2]) allocate(z)
for (i = 0; i < 64; i++)
z[0]++;
#pragma omp teams distribute parallel for reduction (+: z[2:2]) allocate(z)
for (i = 0; i < 64; i++)
z[2]++;
#pragma omp teams distribute parallel for reduction (+: u) allocate(u)
for (i = 0; i < 64; i++)
u++;
#pragma omp teams distribute parallel for reduction (+: v) allocate(v)
for (i = 0; i < 64; i++)
v[0]++;
#pragma omp teams distribute parallel for reduction (+: v[0:3]) allocate(v)
for (i = 0; i < 64; i++)
v[0]++;
#pragma omp teams distribute parallel for reduction (+: v[2:2]) allocate(v)
for (i = 0; i < 64; i++)
v[2]++;
#pragma omp teams distribute parallel for reduction (+: w[:2]) allocate(w)
for (i = 0; i < 64; i++)
w[0]++;
#pragma omp teams distribute parallel for reduction (+: w[2:2]) allocate(w)
for (i = 0; i < 64; i++)
w[2]++;
}
void
baz (long int &x, long int (&y)[4], long int *&z)
{
#pragma omp parallel reduction (+: x) allocate(x)
x++;
#pragma omp parallel reduction (+: y) allocate(y)
y[0]++;
#pragma omp parallel reduction (+: y[0:3]) allocate(y)
y[0]++;
#pragma omp parallel reduction (+: y[2:2]) allocate(y)
y[2]++;
#pragma omp parallel reduction (+: z[:2]) allocate(z)
z[0]++;
#pragma omp parallel reduction (+: z[2:2]) allocate(z)
z[2]++;
}
void
qux (long long int &x, long long int (&y)[4], long long int *&z)
{
int i;
#pragma omp teams distribute parallel for reduction (+: x) allocate(x)
for (i = 0; i < 64; i++)
x++;
#pragma omp teams distribute parallel for reduction (+: y) allocate(y)
for (i = 0; i < 64; i++)
y[0]++;
#pragma omp teams distribute parallel for reduction (+: y[0:3]) allocate(y)
for (i = 0; i < 64; i++)
y[0]++;
#pragma omp teams distribute parallel for reduction (+: y[2:2]) allocate(y)
for (i = 0; i < 64; i++)
y[2]++;
#pragma omp teams distribute parallel for reduction (+: z[:2]) allocate(z)
for (i = 0; i < 64; i++)
z[0]++;
#pragma omp teams distribute parallel for reduction (+: z[2:2]) allocate(z)
for (i = 0; i < 64; i++)
z[2]++;
}
void
test ()
{
long int x = 0;
long int y[4] = { 0, 0, 0, 0 };
long int *z = y;
int u = 0;
int v[4] = { 0, 0, 0, 0 };
int *w = v;
long long int x2 = 0;
long long int y2[4] = { 0, 0, 0, 0 };
long long int *z2 = y2;
foo (x, y, z, u, v, w);
bar (x2, y2, z2, u, v, w);
}
namespace N
{
int a;
void foo ()
{
int i;
#pragma omp parallel firstprivate (N::a) allocate (a)
a++;
#pragma omp parallel firstprivate (a) allocate (N::a)
a++;
#pragma omp teams distribute parallel for firstprivate (N::a) allocate (a)
for (i = 0; i < 64; i++)
a++;
#pragma omp teams distribute parallel for firstprivate (a) allocate (N::a)
for (i = 0; i < 64; i++)
a++;
}
}