mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-19 08:30:28 +08:00
Add v2di support for nvptx
2017-07-19 Tom de Vries <tom@codesourcery.com> * config/nvptx/nvptx-modes.def: Add V2DImode. * config/nvptx/nvptx-protos.h (nvptx_data_alignment): Declare. * config/nvptx/nvptx.c (nvptx_ptx_type_from_mode): Handle V2DImode. (nvptx_output_mov_insn): Handle lack of mov.b128. (nvptx_print_operand): Handle 'H' and 'L' codes. (nvptx_vector_mode_supported): Allow V2DImode. (nvptx_preferred_simd_mode): New function. (nvptx_data_alignment): New function. (TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Redefine to nvptx_preferred_simd_mode. * config/nvptx/nvptx.h (STACK_BOUNDARY, BIGGEST_ALIGNMENT): Change from 64 to 128 bits. (DATA_ALIGNMENT): Define. Set to nvptx_data_alignment. * config/nvptx/nvptx.md (VECIM): Add V2DI. * gcc.target/nvptx/decl-init.c: Update alignment. * gcc.target/nvptx/slp-2-run.c: New test. * gcc.target/nvptx/slp-2.c: New test. * gcc.target/nvptx/v2di.c: New test. * testsuite/libgomp.oacc-c/vec.c: New test. From-SVN: r250341
This commit is contained in:
parent
b98b34b708
commit
3717fbe35e
@ -1,3 +1,23 @@
|
||||
2017-07-19 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* config/nvptx/nvptx.md (VECIM): Add V2DI.
|
||||
|
||||
2017-07-19 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* config/nvptx/nvptx-modes.def: Add V2DImode.
|
||||
* config/nvptx/nvptx-protos.h (nvptx_data_alignment): Declare.
|
||||
* config/nvptx/nvptx.c (nvptx_ptx_type_from_mode): Handle V2DImode.
|
||||
(nvptx_output_mov_insn): Handle lack of mov.b128.
|
||||
(nvptx_print_operand): Handle 'H' and 'L' codes.
|
||||
(nvptx_vector_mode_supported): Allow V2DImode.
|
||||
(nvptx_preferred_simd_mode): New function.
|
||||
(nvptx_data_alignment): New function.
|
||||
(TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Redefine to
|
||||
nvptx_preferred_simd_mode.
|
||||
* config/nvptx/nvptx.h (STACK_BOUNDARY, BIGGEST_ALIGNMENT): Change from
|
||||
64 to 128 bits.
|
||||
(DATA_ALIGNMENT): Define. Set to nvptx_data_alignment.
|
||||
|
||||
2017-07-19 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* config/nvptx/nvptx-modes.def: New file. Add V2SImode.
|
||||
|
@ -1 +1,3 @@
|
||||
VECTOR_MODE (INT, SI, 2); /* V2SI */
|
||||
|
||||
VECTOR_MODE (INT, DI, 2); /* V2DI */
|
||||
|
@ -41,6 +41,7 @@ extern void nvptx_function_end (FILE *);
|
||||
extern void nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT);
|
||||
extern void nvptx_output_ascii (FILE *, const char *, unsigned HOST_WIDE_INT);
|
||||
extern void nvptx_register_pragmas (void);
|
||||
extern unsigned int nvptx_data_alignment (const_tree, unsigned int);
|
||||
|
||||
#ifdef RTX_CODE
|
||||
extern void nvptx_expand_oacc_fork (unsigned);
|
||||
|
@ -238,6 +238,8 @@ nvptx_ptx_type_from_mode (machine_mode mode, bool promote)
|
||||
|
||||
case V2SImode:
|
||||
return ".v2.u32";
|
||||
case V2DImode:
|
||||
return ".v2.u64";
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
@ -2183,7 +2185,20 @@ nvptx_output_mov_insn (rtx dst, rtx src)
|
||||
? "%.\tmov%t0\t%0, %1;" : "%.\tmov.b%T0\t%0, %1;");
|
||||
|
||||
if (GET_MODE_SIZE (dst_inner) == GET_MODE_SIZE (src_inner))
|
||||
return "%.\tmov.b%T0\t%0, %1;";
|
||||
{
|
||||
if (GET_MODE_BITSIZE (dst_mode) == 128
|
||||
&& GET_MODE_BITSIZE (GET_MODE (src)) == 128)
|
||||
{
|
||||
/* mov.b128 is not supported. */
|
||||
if (dst_inner == V2DImode && src_inner == TImode)
|
||||
return "%.\tmov.u64\t%0.x, %L1;\n\t%.\tmov.u64\t%0.y, %H1;";
|
||||
else if (dst_inner == TImode && src_inner == V2DImode)
|
||||
return "%.\tmov.u64\t%L0, %1.x;\n\t%.\tmov.u64\t%H0, %1.y;";
|
||||
|
||||
gcc_unreachable ();
|
||||
}
|
||||
return "%.\tmov.b%T0\t%0, %1;";
|
||||
}
|
||||
|
||||
return "%.\tcvt%t0%t1\t%0, %1;";
|
||||
}
|
||||
@ -2421,6 +2436,20 @@ nvptx_print_operand (FILE *file, rtx x, int code)
|
||||
fprintf (file, "%s", nvptx_ptx_type_from_mode (mode, code == 't'));
|
||||
break;
|
||||
|
||||
case 'H':
|
||||
case 'L':
|
||||
{
|
||||
rtx inner_x = SUBREG_REG (x);
|
||||
machine_mode inner_mode = GET_MODE (inner_x);
|
||||
machine_mode split = maybe_split_mode (inner_mode);
|
||||
|
||||
output_reg (file, REGNO (inner_x), split,
|
||||
(code == 'H'
|
||||
? GET_MODE_SIZE (inner_mode) / 2
|
||||
: 0));
|
||||
}
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
{
|
||||
nvptx_shuffle_kind kind = (nvptx_shuffle_kind) UINTVAL (x);
|
||||
@ -5439,7 +5468,38 @@ nvptx_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
|
||||
static bool
|
||||
nvptx_vector_mode_supported (machine_mode mode)
|
||||
{
|
||||
return mode == V2SImode;
|
||||
return (mode == V2SImode
|
||||
|| mode == V2DImode);
|
||||
}
|
||||
|
||||
/* Return the preferred mode for vectorizing scalar MODE. */
|
||||
|
||||
static machine_mode
|
||||
nvptx_preferred_simd_mode (machine_mode mode)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case DImode:
|
||||
return V2DImode;
|
||||
case SImode:
|
||||
return V2SImode;
|
||||
|
||||
default:
|
||||
return default_preferred_simd_mode (mode);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int
|
||||
nvptx_data_alignment (const_tree type, unsigned int basic_align)
|
||||
{
|
||||
if (TREE_CODE (type) == INTEGER_TYPE)
|
||||
{
|
||||
unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
|
||||
if (size == GET_MODE_SIZE (TImode))
|
||||
return GET_MODE_BITSIZE (maybe_split_mode (TImode));
|
||||
}
|
||||
|
||||
return basic_align;
|
||||
}
|
||||
|
||||
#undef TARGET_OPTION_OVERRIDE
|
||||
@ -5562,6 +5622,10 @@ nvptx_vector_mode_supported (machine_mode mode)
|
||||
#undef TARGET_VECTOR_MODE_SUPPORTED_P
|
||||
#define TARGET_VECTOR_MODE_SUPPORTED_P nvptx_vector_mode_supported
|
||||
|
||||
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
||||
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
|
||||
nvptx_preferred_simd_mode
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-nvptx.h"
|
||||
|
@ -52,13 +52,15 @@
|
||||
|
||||
/* Alignments in bits. */
|
||||
#define PARM_BOUNDARY 32
|
||||
#define STACK_BOUNDARY 64
|
||||
#define STACK_BOUNDARY 128
|
||||
#define FUNCTION_BOUNDARY 32
|
||||
#define BIGGEST_ALIGNMENT 64
|
||||
#define BIGGEST_ALIGNMENT 128
|
||||
#define STRICT_ALIGNMENT 1
|
||||
|
||||
#define MAX_STACK_ALIGNMENT (1024 * 8)
|
||||
|
||||
#define DATA_ALIGNMENT nvptx_data_alignment
|
||||
|
||||
/* Copied from elf.h and other places. We'd otherwise use
|
||||
BIGGEST_ALIGNMENT and fail a number of testcases. */
|
||||
#define MAX_OFILE_ALIGNMENT (32768 * 8)
|
||||
|
@ -184,7 +184,7 @@
|
||||
(define_mode_iterator SDCM [SC DC])
|
||||
(define_mode_iterator BITS [SI SF])
|
||||
(define_mode_iterator BITD [DI DF])
|
||||
(define_mode_iterator VECIM [V2SI])
|
||||
(define_mode_iterator VECIM [V2SI V2DI])
|
||||
|
||||
;; This mode iterator allows :P to be used for patterns that operate on
|
||||
;; pointer-sized quantities. Exactly one of the two alternatives will match.
|
||||
|
@ -1,3 +1,10 @@
|
||||
2017-07-19 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* gcc.target/nvptx/decl-init.c: Update alignment.
|
||||
* gcc.target/nvptx/slp-2-run.c: New test.
|
||||
* gcc.target/nvptx/slp-2.c: New test.
|
||||
* gcc.target/nvptx/v2di.c: New test.
|
||||
|
||||
2017-07-19 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* gcc.target/nvptx/slp-run.c: New test.
|
||||
|
@ -37,7 +37,7 @@ struct five five2[2] = {{12, 13}, {14, 15}};
|
||||
/* { dg-final { scan-assembler ".align 1 .u8 five2\\\[10\\\] = { 12, 13, 0, 0, 0, 14, 15, 0, 0, 0 };" } } */
|
||||
|
||||
int __attribute__((vector_size(16))) vi = {16, 17, 18, 19};
|
||||
/* { dg-final { scan-assembler ".align 8 .u32 vi\\\[4\\\] = { 16, 17, 18, 19 };" } } */
|
||||
/* { dg-final { scan-assembler ".align 16 .u32 vi\\\[4\\\] = { 16, 17, 18, 19 };" } } */
|
||||
|
||||
typedef int __attribute ((mode(TI))) ti_t;
|
||||
|
||||
|
23
gcc/testsuite/gcc.target/nvptx/slp-2-run.c
Normal file
23
gcc/testsuite/gcc.target/nvptx/slp-2-run.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -ftree-slp-vectorize" } */
|
||||
|
||||
#include "slp-2.c"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < 1000; i += 1)
|
||||
{
|
||||
p[i] = i;
|
||||
p2[i] = 0;
|
||||
}
|
||||
|
||||
foo ();
|
||||
|
||||
for (i = 0; i < 1000; i += 1)
|
||||
if (p2[i] != i)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
25
gcc/testsuite/gcc.target/nvptx/slp-2.c
Normal file
25
gcc/testsuite/gcc.target/nvptx/slp-2.c
Normal file
@ -0,0 +1,25 @@
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-options "-O2 -ftree-slp-vectorize -save-temps" } */
|
||||
|
||||
long long int p[1000] __attribute__((aligned(16)));
|
||||
long long int p2[1000] __attribute__((aligned(16)));
|
||||
|
||||
void __attribute__((noinline, noclone))
|
||||
foo ()
|
||||
{
|
||||
long long int a, b;
|
||||
|
||||
unsigned int i;
|
||||
for (i = 0; i < 1000; i += 2)
|
||||
{
|
||||
a = p[i];
|
||||
b = p[i+1];
|
||||
|
||||
p2[i] = a;
|
||||
p2[i+1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "ld.v2.u64" } } */
|
||||
/* { dg-final { scan-assembler "st.v2.u64" } } */
|
||||
|
12
gcc/testsuite/gcc.target/nvptx/v2di.c
Normal file
12
gcc/testsuite/gcc.target/nvptx/v2di.c
Normal file
@ -0,0 +1,12 @@
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-options "-O2 -save-temps" } */
|
||||
|
||||
typedef long long int __v2di __attribute__((__vector_size__(16)));
|
||||
|
||||
#define TYPE __v2di
|
||||
#include "vec.inc"
|
||||
|
||||
/* { dg-final { scan-assembler ".reg\\.v2\\.u64" } } */
|
||||
/* { dg-final { scan-assembler "ld\\.v2\\.u64" } } */
|
||||
/* { dg-final { scan-assembler "st\\.v2\\.u64" } } */
|
||||
/* { dg-final { scan-assembler "mov\\.v2\\.u64.*\\{ 1, 2 \\}" } } */
|
@ -1,3 +1,7 @@
|
||||
2017-07-19 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* testsuite/libgomp.oacc-c/vec.c: New test.
|
||||
|
||||
2017-07-03 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* plugin/plugin-hsa.c: Fix secure_getenv.h include.
|
||||
|
48
libgomp/testsuite/libgomp.oacc-c/vec.c
Normal file
48
libgomp/testsuite/libgomp.oacc-c/vec.c
Normal file
@ -0,0 +1,48 @@
|
||||
/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
|
||||
/* { dg-additional-options "-std=c99 -ftree-slp-vectorize -foffload=-ftree-slp-vectorize -foffload=-fdump-tree-slp1 -foffload=-save-temps -save-temps" } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
long long int p[32 *1000] __attribute__((aligned(16)));
|
||||
long long int p2[32 *1000] __attribute__((aligned(16)));
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
#pragma acc parallel num_gangs(1) num_workers(1) vector_length(32)
|
||||
{
|
||||
if (((unsigned long int)p & (0xfULL)) != 0)
|
||||
__builtin_abort ();
|
||||
if (((unsigned long int)p2 & (0xfULL)) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (unsigned int k = 0; k < 10000; k += 1)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (unsigned long long int j = 0; j < 32; j += 1)
|
||||
{
|
||||
unsigned long long a, b;
|
||||
unsigned long long *p3, *p4;
|
||||
p3 = (unsigned long long *)((unsigned long long int)p & (~0xfULL));
|
||||
p4 = (unsigned long long *)((unsigned long long int)p2 & (~0xfULL));
|
||||
|
||||
for (unsigned int i = 0; i < 1000; i += 2)
|
||||
{
|
||||
a = p3[j * 1000 + i];
|
||||
b = p3[j * 1000 + i + 1];
|
||||
|
||||
p4[j * 1000 + i] = a;
|
||||
p4[j * 1000 + i + 1] = b;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Todo: make a scan-tree-dump variant that scans vec.o instead. */
|
||||
/* { dg-final { file copy -force [glob vec.o.*] [regsub \.o\. [glob vec.o.*] \.c\.] } } */
|
||||
/* { dg-final { scan-tree-dump "vector\\(2\\) long long unsigned int" "slp1" } } */
|
Loading…
x
Reference in New Issue
Block a user