mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 05:00:26 +08:00
re PR target/70329 (wrong code with -mavx512bw and 64byte vectors)
PR target/70329 * config/i386/i386.c (ix86_expand_vecop_qihi): Don't bother computing d.perm[i] for i >= d.nelt. If not full_interleave, compute d.perm[i] in a way that works also for AVX512BW. * gcc.target/i386/avx512bw-pr70329-1.c: New test. * gcc.target/i386/avx512bw-pr70329-2.c: New test. From-SVN: r234394
This commit is contained in:
parent
4e3e2cf17d
commit
b01915ed16
@ -1,5 +1,10 @@
|
||||
2016-03-22 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/70329
|
||||
* config/i386/i386.c (ix86_expand_vecop_qihi): Don't bother computing
|
||||
d.perm[i] for i >= d.nelt. If not full_interleave, compute d.perm[i]
|
||||
in a way that works also for AVX512BW.
|
||||
|
||||
PR target/70300
|
||||
* config/i386/i386.md (cvtsd2ss splitter): Unpack in destination
|
||||
instead of source if operands[1] is xmm16 and above and
|
||||
|
@ -51910,16 +51910,24 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
|
||||
{
|
||||
/* For SSE2, we used an full interleave, so the desired
|
||||
results are in the even elements. */
|
||||
for (i = 0; i < 64; ++i)
|
||||
for (i = 0; i < d.nelt; ++i)
|
||||
d.perm[i] = i * 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* For AVX, the interleave used above was not cross-lane. So the
|
||||
extraction is evens but with the second and third quarter swapped.
|
||||
Happily, that is even one insn shorter than even extraction. */
|
||||
for (i = 0; i < 64; ++i)
|
||||
d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
|
||||
Happily, that is even one insn shorter than even extraction.
|
||||
For AVX512BW we have 4 lanes. We extract evens from within a lane,
|
||||
always first from the first and then from the second source operand,
|
||||
the index bits above the low 4 bits remains the same.
|
||||
Thus, for d.nelt == 32 we want permutation
|
||||
0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
|
||||
and for d.nelt == 64 we want permutation
|
||||
0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
|
||||
32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
|
||||
for (i = 0; i < d.nelt; ++i)
|
||||
d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
|
||||
}
|
||||
|
||||
ok = ix86_expand_vec_perm_const_1 (&d);
|
||||
|
@ -1,5 +1,9 @@
|
||||
2016-03-22 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/70329
|
||||
* gcc.target/i386/avx512bw-pr70329-1.c: New test.
|
||||
* gcc.target/i386/avx512bw-pr70329-2.c: New test.
|
||||
|
||||
PR target/70300
|
||||
* gcc.target/i386/pr70300.c: New test.
|
||||
|
||||
|
27
gcc/testsuite/gcc.target/i386/avx512bw-pr70329-1.c
Normal file
27
gcc/testsuite/gcc.target/i386/avx512bw-pr70329-1.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* PR target/70329 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O0 -mavx512bw" } */
|
||||
/* { dg-require-effective-target avx512bw } */
|
||||
|
||||
#define AVX512BW
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
typedef unsigned char A __attribute__ ((vector_size (64)));
|
||||
typedef unsigned int B __attribute__ ((vector_size (64)));
|
||||
|
||||
unsigned __attribute__ ((noinline, noclone))
|
||||
foo (A a, A b, B c)
|
||||
{
|
||||
a *= b;
|
||||
c[1] += a[8];
|
||||
return c[1];
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
A a = (A) { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
unsigned x = foo (a, a, (B) { 1, 2 });
|
||||
if (x != 83)
|
||||
abort ();
|
||||
}
|
33
gcc/testsuite/gcc.target/i386/avx512bw-pr70329-2.c
Normal file
33
gcc/testsuite/gcc.target/i386/avx512bw-pr70329-2.c
Normal file
@ -0,0 +1,33 @@
|
||||
/* PR target/70329 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -mavx512bw" } */
|
||||
/* { dg-require-effective-target avx512bw } */
|
||||
|
||||
#define AVX512BW
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
foo (unsigned char *src1, unsigned char *src2, unsigned char *dst)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 64; i++)
|
||||
dst[i] = (unsigned char) ((int) src1[i] * (int) src2[i]);
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
unsigned char a[64], b[64], c[64];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 64; i++)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = (i + 1);
|
||||
}
|
||||
foo (a, b, c);
|
||||
for (i = 0; i < 64; i++)
|
||||
if (c[i] != (unsigned char) (i * (i + 1)))
|
||||
abort ();
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user