mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-14 05:20:25 +08:00
re PR tree-optimization/53366 (wrong code generation by tree vectorizer using AVX)
PR tree-optimization/53366 * tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut tests if complex_numbers == 2, but there are non-complex number loads too. * gcc.dg/torture/pr53366-1.c: New test. * gcc.dg/torture/pr53366-2.c: New test. * gcc.target/i386/pr53366-1.c: New test. * gcc.target/i386/pr53366-2.c: New test. From-SVN: r187717
This commit is contained in:
parent
017a202055
commit
7772bae06c
@ -1,5 +1,10 @@
|
||||
2012-05-21 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/53366
|
||||
* tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut
|
||||
tests if complex_numbers == 2, but there are non-complex number loads
|
||||
too.
|
||||
|
||||
PR tree-optimization/53409
|
||||
* tree-vect-loop.c (vect_analyze_loop_operations): Don't check
|
||||
vinfo_for_stmt (op_def_stmt) if op_def_stmt isn't inside loop.
|
||||
|
@ -1,5 +1,11 @@
|
||||
2012-05-21 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/53366
|
||||
* gcc.dg/torture/pr53366-1.c: New test.
|
||||
* gcc.dg/torture/pr53366-2.c: New test.
|
||||
* gcc.target/i386/pr53366-1.c: New test.
|
||||
* gcc.target/i386/pr53366-2.c: New test.
|
||||
|
||||
PR tree-optimization/53409
|
||||
* gcc.c-torture/compile/pr53409.c: New test.
|
||||
|
||||
|
70
gcc/testsuite/gcc.dg/torture/pr53366-1.c
Normal file
70
gcc/testsuite/gcc.dg/torture/pr53366-1.c
Normal file
@ -0,0 +1,70 @@
|
||||
/* PR tree-optimization/53366 */
|
||||
/* { dg-do run } */
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
struct S { double v[3]; };
|
||||
struct T { struct S r, i; };
|
||||
struct U { struct T j[5]; };
|
||||
|
||||
void
|
||||
foo (struct U *__restrict p1, struct U *__restrict p2,
|
||||
struct S l1, struct S l2, struct S l3, struct S l4,
|
||||
const double _Complex * __restrict x, int y, int z)
|
||||
{
|
||||
int i, j;
|
||||
while (y < z - 2)
|
||||
{
|
||||
for (j = 0; j < 5; ++j)
|
||||
{
|
||||
double a = __real__ x[5 * y + j];
|
||||
double b = __imag__ x[5 * y + j];
|
||||
double c = __real__ x[5 * (y + 2) + j];
|
||||
double d = __imag__ x[5 * (y + 2) + j];
|
||||
double e = __real__ x[5 * (y + 1) + j];
|
||||
double f = __imag__ x[5 * (y + 1) + j];
|
||||
double g = __real__ x[5 * (y + 3) + j];
|
||||
double h = __imag__ x[5 * (y + 3) + j];
|
||||
for (i = 0; i < 3; ++i)
|
||||
{
|
||||
p1->j[j].r.v[i] += l2.v[i] * a;
|
||||
p1->j[j].r.v[i] += l4.v[i] * c;
|
||||
p1->j[j].i.v[i] += l2.v[i] * b;
|
||||
p1->j[j].i.v[i] += l4.v[i] * d;
|
||||
p2->j[j].r.v[i] += l3.v[i] * e;
|
||||
p2->j[j].r.v[i] += l1.v[i] * g;
|
||||
p2->j[j].i.v[i] += l3.v[i] * f;
|
||||
p2->j[j].i.v[i] += l1.v[i] * h;
|
||||
}
|
||||
}
|
||||
y += 4;
|
||||
}
|
||||
}
|
||||
|
||||
_Complex double x[5005];
|
||||
struct U p1, p2;
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i, j;
|
||||
struct S l1, l2, l3, l4;
|
||||
for (i = 0; i < 5005; ++i)
|
||||
x[i] = i + 1.0iF * (2 * i);
|
||||
for (i = 0; i < 3; ++i)
|
||||
{
|
||||
l1.v[i] = 1;
|
||||
l2.v[i] = 2;
|
||||
l3.v[i] = 3;
|
||||
l4.v[i] = 4;
|
||||
}
|
||||
foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000);
|
||||
for (j = 0; j < 5; ++j)
|
||||
for (i = 0; i < 3; ++i)
|
||||
if (p1.j[j].r.v[i] != 3752430 + j * 1494.0
|
||||
|| p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2
|
||||
|| p2.j[j].r.v[i] != 2502450 + j * 996.0
|
||||
|| p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
43
gcc/testsuite/gcc.dg/torture/pr53366-2.c
Normal file
43
gcc/testsuite/gcc.dg/torture/pr53366-2.c
Normal file
@ -0,0 +1,43 @@
|
||||
/* PR tree-optimization/53366 */
|
||||
/* { dg-do run } */
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
struct T { float r[3], i[3]; };
|
||||
struct U { struct T j[2]; };
|
||||
|
||||
void __attribute__ ((noinline))
|
||||
foo (struct U *__restrict y, const float _Complex *__restrict x)
|
||||
{
|
||||
int i, j;
|
||||
for (j = 0; j < 2; ++j)
|
||||
{
|
||||
float a = __real__ x[j];
|
||||
float b = __imag__ x[j];
|
||||
float c = __real__ x[j + 2];
|
||||
float d = __imag__ x[j + 2];
|
||||
for (i = 0; i < 3; ++i)
|
||||
{
|
||||
y->j[j].r[i] = y->j[j].r[i] + a + c;
|
||||
y->j[j].i[i] = y->j[j].i[i] + b + d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_Complex float x[4];
|
||||
struct U y;
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < 4; ++i)
|
||||
x[i] = i + 1.0iF * (2 * i);
|
||||
foo (&y, x);
|
||||
for (j = 0; j < 2; ++j)
|
||||
for (i = 0; i < 3; ++i)
|
||||
if (y.j[j].r[i] != __real__ (x[j] + x[j + 2])
|
||||
|| y.j[j].i[i] != __imag__ (x[j] + x[j + 2]))
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
5
gcc/testsuite/gcc.target/i386/pr53366-1.c
Normal file
5
gcc/testsuite/gcc.target/i386/pr53366-1.c
Normal file
@ -0,0 +1,5 @@
|
||||
/* PR tree-optimization/53366 */
|
||||
/* { dg-do run { target avx_runtime } } */
|
||||
/* { dg-options "-O3 -mavx" } */
|
||||
|
||||
#include "../../gcc.dg/torture/pr53366-1.c"
|
5
gcc/testsuite/gcc.target/i386/pr53366-2.c
Normal file
5
gcc/testsuite/gcc.target/i386/pr53366-2.c
Normal file
@ -0,0 +1,5 @@
|
||||
/* PR tree-optimization/53366 */
|
||||
/* { dg-do run { target avx_runtime } } */
|
||||
/* { dg-options "-O3 -mavx" } */
|
||||
|
||||
#include "../../gcc.dg/torture/pr53366-2.c"
|
@ -1199,7 +1199,8 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
||||
|
||||
/* We checked that this case ok, so there is no need to proceed with
|
||||
permutation tests. */
|
||||
if (complex_numbers == 2)
|
||||
if (complex_numbers == 2
|
||||
&& VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
|
||||
{
|
||||
VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
|
||||
VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
|
||||
|
Loading…
x
Reference in New Issue
Block a user