mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-09 17:01:06 +08:00
nvptx.c (nvptx_generate_vector_shuffle): Deal with complex types.
gcc/ * config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with complex types. libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New. From-SVN: r230325
This commit is contained in:
parent
23c64853c8
commit
dd3c1b14af
gcc
libgomp
@ -9,6 +9,11 @@
|
||||
using EDGE_FALSE_VALUE for edges to the call block and
|
||||
EDGE_TRUE_VALUE for the others.
|
||||
|
||||
2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with
|
||||
complex types.
|
||||
|
||||
2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* gimplify.c (oacc_default_clause): Use inform for enclosing scope.
|
||||
|
@ -3634,26 +3634,51 @@ nvptx_generate_vector_shuffle (location_t loc,
|
||||
{
|
||||
unsigned fn = NVPTX_BUILTIN_SHUFFLE;
|
||||
tree_code code = NOP_EXPR;
|
||||
tree type = unsigned_type_node;
|
||||
enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
|
||||
tree arg_type = unsigned_type_node;
|
||||
tree var_type = TREE_TYPE (var);
|
||||
tree dest_type = var_type;
|
||||
|
||||
if (!INTEGRAL_MODE_P (mode))
|
||||
if (TREE_CODE (var_type) == COMPLEX_TYPE)
|
||||
var_type = TREE_TYPE (var_type);
|
||||
|
||||
if (TREE_CODE (var_type) == REAL_TYPE)
|
||||
code = VIEW_CONVERT_EXPR;
|
||||
if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (DImode))
|
||||
|
||||
if (TYPE_SIZE (var_type)
|
||||
== TYPE_SIZE (long_long_unsigned_type_node))
|
||||
{
|
||||
fn = NVPTX_BUILTIN_SHUFFLELL;
|
||||
type = long_long_unsigned_type_node;
|
||||
arg_type = long_long_unsigned_type_node;
|
||||
}
|
||||
|
||||
tree call = nvptx_builtin_decl (fn, true);
|
||||
tree bits = build_int_cst (unsigned_type_node, shift);
|
||||
tree kind = build_int_cst (unsigned_type_node, SHUFFLE_DOWN);
|
||||
tree expr;
|
||||
|
||||
if (var_type != dest_type)
|
||||
{
|
||||
/* Do real and imaginary parts separately. */
|
||||
tree real = fold_build1 (REALPART_EXPR, var_type, var);
|
||||
real = fold_build1 (code, arg_type, real);
|
||||
real = build_call_expr_loc (loc, call, 3, real, bits, kind);
|
||||
real = fold_build1 (code, var_type, real);
|
||||
|
||||
tree imag = fold_build1 (IMAGPART_EXPR, var_type, var);
|
||||
imag = fold_build1 (code, arg_type, imag);
|
||||
imag = build_call_expr_loc (loc, call, 3, imag, bits, kind);
|
||||
imag = fold_build1 (code, var_type, imag);
|
||||
|
||||
expr = fold_build2 (COMPLEX_EXPR, dest_type, real, imag);
|
||||
}
|
||||
else
|
||||
{
|
||||
expr = fold_build1 (code, arg_type, var);
|
||||
expr = build_call_expr_loc (loc, call, 3, expr, bits, kind);
|
||||
expr = fold_build1 (code, dest_type, expr);
|
||||
}
|
||||
|
||||
tree call = nvptx_builtin_decl (fn, true);
|
||||
call = build_call_expr_loc
|
||||
(loc, call, 3, fold_build1 (code, type, var),
|
||||
build_int_cst (unsigned_type_node, shift),
|
||||
build_int_cst (unsigned_type_node, SHUFFLE_DOWN));
|
||||
|
||||
call = fold_build1 (code, TREE_TYPE (dest_var), call);
|
||||
|
||||
gimplify_assign (dest_var, call, seq);
|
||||
gimplify_assign (dest_var, expr, seq);
|
||||
}
|
||||
|
||||
/* Insert code to locklessly update *PTR with *PTR OP VAR just before
|
||||
|
@ -1,3 +1,8 @@
|
||||
2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New.
|
||||
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New.
|
||||
|
||||
2015-11-12 James Norris <jnorris@codesourcery.com>
|
||||
Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
|
@ -0,0 +1,52 @@
|
||||
|
||||
#include <complex.h>
|
||||
|
||||
/* Double float has 53 bits of fraction. */
|
||||
#define FRAC (1.0 / (1LL << 48))
|
||||
|
||||
int close_enough (double _Complex a, double _Complex b)
|
||||
{
|
||||
double _Complex diff = a - b;
|
||||
double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
|
||||
double mag2_diff = (__real__(diff) * __real__ (diff)
|
||||
+ __imag__ (diff) * __imag__ (diff));
|
||||
|
||||
return mag2_diff / mag2_a < (FRAC * FRAC);
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
#define N 100
|
||||
double _Complex ary[N], sum, prod, tsum, tprod;
|
||||
int ix;
|
||||
|
||||
sum = tsum = 0;
|
||||
prod = tprod = 1;
|
||||
|
||||
for (ix = 0; ix < N; ix++)
|
||||
{
|
||||
double frac = ix * (1.0 / 1024) + 1.0;
|
||||
|
||||
ary[ix] = frac + frac * 2.0i - 1.0i;
|
||||
sum += ary[ix];
|
||||
prod *= ary[ix];
|
||||
}
|
||||
|
||||
#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod)
|
||||
{
|
||||
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||
for (ix = 0; ix < N; ix++)
|
||||
{
|
||||
tsum += ary[ix];
|
||||
tprod *= ary[ix];
|
||||
}
|
||||
}
|
||||
|
||||
if (!close_enough (sum, tsum))
|
||||
return 1;
|
||||
|
||||
if (!close_enough (prod, tprod))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
|
||||
#include <complex.h>
|
||||
|
||||
/* Single float has 23 bits of fraction. */
|
||||
#define FRAC (1.0f / (1 << 20))
|
||||
|
||||
int close_enough (float _Complex a, float _Complex b)
|
||||
{
|
||||
float _Complex diff = a - b;
|
||||
float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
|
||||
float mag2_diff = (__real__(diff) * __real__ (diff)
|
||||
+ __imag__ (diff) * __imag__ (diff));
|
||||
|
||||
return mag2_diff / mag2_a < (FRAC * FRAC);
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
#define N 100
|
||||
float _Complex ary[N], sum, prod, tsum, tprod;
|
||||
int ix;
|
||||
|
||||
sum = tsum = 0;
|
||||
prod = tprod = 1;
|
||||
|
||||
for (ix = 0; ix < N; ix++)
|
||||
{
|
||||
float frac = ix * (1.0f / 1024) + 1.0f;
|
||||
|
||||
ary[ix] = frac + frac * 2.0i - 1.0i;
|
||||
sum += ary[ix];
|
||||
prod *= ary[ix];
|
||||
}
|
||||
|
||||
#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod)
|
||||
{
|
||||
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
|
||||
for (ix = 0; ix < N; ix++)
|
||||
{
|
||||
tsum += ary[ix];
|
||||
tprod *= ary[ix];
|
||||
}
|
||||
}
|
||||
|
||||
if (!close_enough (sum, tsum))
|
||||
return 1;
|
||||
|
||||
if (!close_enough (prod, tprod))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user