mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-24 23:45:59 +08:00
Simplify (B * v + C) * D -> BD* v + CD when B,C,D are all INTEGER_CST.
Similar for (v + B) * C + D -> C * v + BCD. Don't simplify it when there's overflow and overflow is UB for type v. gcc/ChangeLog: PR tree-optimization/53533 * match.pd: Simplify (B * v + C) * D -> BD * v + CD and (v + B) * C + D -> C * v + BCD when B,C,D are all INTEGER_CST, and there's no overflow or !TYPE_OVERFLOW_UNDEFINED. gcc/testsuite/ChangeLog: * gcc.target/i386/pr53533-1.c: New test. * gcc.target/i386/pr53533-2.c: New test. * gcc.target/i386/pr53533-3.c: New test. * gcc.target/i386/pr53533-4.c: New test. * gcc.target/i386/pr53533-5.c: New test. * gcc.dg/vect/slp-11a.c: Adjust testcase.
This commit is contained in:
parent
499b9c5f09
commit
1089d08311
81
gcc/match.pd
81
gcc/match.pd
@ -489,6 +489,87 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
(if (!overflow || TYPE_OVERFLOW_WRAPS (type))
|
||||
(mult @0 { wide_int_to_tree (type, mul); }))))
|
||||
|
||||
/* Similar to above, but there could be an extra add/sub between
|
||||
successive multuiplications. */
|
||||
(simplify
|
||||
(mult (plus:s (mult:s@4 @0 INTEGER_CST@1) INTEGER_CST@2) INTEGER_CST@3)
|
||||
(with {
|
||||
bool overflowed = true;
|
||||
wi::overflow_type ovf1, ovf2;
|
||||
wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@3),
|
||||
TYPE_SIGN (type), &ovf1);
|
||||
wide_int add = wi::mul (wi::to_wide (@2), wi::to_wide (@3),
|
||||
TYPE_SIGN (type), &ovf2);
|
||||
if (TYPE_OVERFLOW_UNDEFINED (type))
|
||||
{
|
||||
#if GIMPLE
|
||||
value_range vr0;
|
||||
if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE
|
||||
&& get_global_range_query ()->range_of_expr (vr0, @4)
|
||||
&& vr0.kind () == VR_RANGE)
|
||||
{
|
||||
wide_int wmin0 = vr0.lower_bound ();
|
||||
wide_int wmax0 = vr0.upper_bound ();
|
||||
wmin0 = wi::mul (wmin0, wi::to_wide (@3), TYPE_SIGN (type), &ovf1);
|
||||
wmax0 = wi::mul (wmax0, wi::to_wide (@3), TYPE_SIGN (type), &ovf2);
|
||||
if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE)
|
||||
{
|
||||
wi::add (wmin0, add, TYPE_SIGN (type), &ovf1);
|
||||
wi::add (wmax0, add, TYPE_SIGN (type), &ovf2);
|
||||
if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE)
|
||||
overflowed = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
overflowed = false;
|
||||
}
|
||||
/* Skip folding on overflow. */
|
||||
(if (!overflowed)
|
||||
(plus (mult @0 { wide_int_to_tree (type, mul); })
|
||||
{ wide_int_to_tree (type, add); }))))
|
||||
|
||||
/* Similar to above, but a multiplication between successive additions. */
|
||||
(simplify
|
||||
(plus (mult:s (plus:s @0 INTEGER_CST@1) INTEGER_CST@2) INTEGER_CST@3)
|
||||
(with {
|
||||
bool overflowed = true;
|
||||
wi::overflow_type ovf1;
|
||||
wi::overflow_type ovf2;
|
||||
wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2),
|
||||
TYPE_SIGN (type), &ovf1);
|
||||
wide_int add = wi::add (mul, wi::to_wide (@3),
|
||||
TYPE_SIGN (type), &ovf2);
|
||||
if (TYPE_OVERFLOW_UNDEFINED (type))
|
||||
{
|
||||
#if GIMPLE
|
||||
value_range vr0;
|
||||
if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE
|
||||
&& get_global_range_query ()->range_of_expr (vr0, @0)
|
||||
&& vr0.kind () == VR_RANGE)
|
||||
{
|
||||
wide_int wmin0 = vr0.lower_bound ();
|
||||
wide_int wmax0 = vr0.upper_bound ();
|
||||
wmin0 = wi::mul (wmin0, wi::to_wide (@2), TYPE_SIGN (type), &ovf1);
|
||||
wmax0 = wi::mul (wmax0, wi::to_wide (@2), TYPE_SIGN (type), &ovf2);
|
||||
if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE)
|
||||
{
|
||||
wi::add (wmin0, mul, TYPE_SIGN (type), &ovf1);
|
||||
wi::add (wmax0, mul, TYPE_SIGN (type), &ovf2);
|
||||
if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE)
|
||||
overflowed = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
overflowed = false;
|
||||
}
|
||||
/* Skip folding on overflow. */
|
||||
(if (!overflowed)
|
||||
(plus (mult @0 @2) { wide_int_to_tree (type, add); }))))
|
||||
|
||||
/* Optimize A / A to 1.0 if we don't care about
|
||||
NaNs or Infinities. */
|
||||
(simplify
|
||||
|
@ -9,14 +9,14 @@ int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
|
||||
unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
|
||||
int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
/* Different operations - not SLPable. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a0 = in[i*8] + 5;
|
||||
a1 = in[i*8 + 1] * 6;
|
||||
a1 = in[i*8 + 1] * 51072;
|
||||
a2 = in[i*8 + 2] + 7;
|
||||
a3 = in[i*8 + 3] + 8;
|
||||
a4 = in[i*8 + 4] + 9;
|
||||
@ -25,7 +25,7 @@ main1 ()
|
||||
a7 = in[i*8 + 7] + 12;
|
||||
|
||||
b0 = a0 * 3;
|
||||
b1 = a1 * 2;
|
||||
b1 = a1 * 51072;
|
||||
b2 = a2 * 12;
|
||||
b3 = a3 * 5;
|
||||
b4 = a4 * 8;
|
||||
@ -47,7 +47,7 @@ main1 ()
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (out[i*8] != (in[i*8] + 5) * 3 - 2
|
||||
|| out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
|
||||
|| out[i*8 + 1] != (in[i*8 + 1] * 51072) * 51072 - 3
|
||||
|| out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
|
||||
|| out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
|
||||
|| out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
|
||||
|
23
gcc/testsuite/gcc.target/i386/pr53533-1.c
Normal file
23
gcc/testsuite/gcc.target/i386/pr53533-1.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1" } */
|
||||
/* { dg-final { scan-assembler-times "imull\[ \t\]" "1" } } */
|
||||
/* { dg-final { scan-assembler-times "(?:addl|subl)\[ \t\]" "1" { target { ! ia32 } } } } */
|
||||
|
||||
void
|
||||
__attribute__((noipa))
|
||||
foo (unsigned a[256], unsigned b[256])
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 256; ++i)
|
||||
{
|
||||
unsigned tmp = a[i] + 12345U;
|
||||
tmp *= 914237U;
|
||||
tmp += 12332U;
|
||||
tmp *= 914237U;
|
||||
tmp += 12332U;
|
||||
tmp *= 914237U;
|
||||
tmp -= 13U;
|
||||
tmp *= 8000U;
|
||||
b[i] = tmp;
|
||||
}
|
||||
}
|
46
gcc/testsuite/gcc.target/i386/pr53533-2.c
Normal file
46
gcc/testsuite/gcc.target/i386/pr53533-2.c
Normal file
@ -0,0 +1,46 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
#include "pr53533-1.c"
|
||||
|
||||
void
|
||||
__attribute__((optimize("-O0")))
|
||||
foo1 (unsigned a[256], unsigned b[256])
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 256; ++i)
|
||||
{
|
||||
unsigned tmp = a[i] + 12345U;
|
||||
tmp *= 914237U;
|
||||
tmp += 12332U;
|
||||
tmp *= 914237U;
|
||||
tmp += 12332U;
|
||||
tmp *= 914237U;
|
||||
tmp -= 13U;
|
||||
tmp *= 8000U;
|
||||
b[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned int a[256];
|
||||
unsigned int b[256];
|
||||
unsigned int c[256];
|
||||
for (unsigned int i = 0; i != 256; i++)
|
||||
{
|
||||
b[i] = 0;
|
||||
c[i] = 1;
|
||||
a[i] = i * i - 10 * i + 33;
|
||||
}
|
||||
foo (a, b);
|
||||
foo1 (a, c);
|
||||
|
||||
for (unsigned int i = 0; i != 256; i++)
|
||||
{
|
||||
if (b[i] != c[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
24
gcc/testsuite/gcc.target/i386/pr53533-3.c
Normal file
24
gcc/testsuite/gcc.target/i386/pr53533-3.c
Normal file
@ -0,0 +1,24 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -fwrapv" } */
|
||||
/* { dg-final { scan-assembler-times "imull\[ \t\]" "1" } } */
|
||||
/* { dg-final { scan-assembler-times "(?:addl|subl)\[ \t\]" "1" { target { ! ia32 } } } } */
|
||||
|
||||
void
|
||||
__attribute__((noipa))
|
||||
foo (int a[256], int b[256])
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 256; ++i)
|
||||
{
|
||||
int tmp = a[i] + 12345;
|
||||
tmp *= 914237;
|
||||
tmp += 12332;
|
||||
tmp *= 914237;
|
||||
tmp += 12332;
|
||||
tmp *= 914237;
|
||||
tmp -= 13;
|
||||
tmp *= 8000;
|
||||
b[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
46
gcc/testsuite/gcc.target/i386/pr53533-4.c
Normal file
46
gcc/testsuite/gcc.target/i386/pr53533-4.c
Normal file
@ -0,0 +1,46 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fwrapv" } */
|
||||
|
||||
#include "pr53533-3.c"
|
||||
|
||||
void
|
||||
__attribute__((optimize("-O0")))
|
||||
foo1 (int a[256], int b[256])
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 256; ++i)
|
||||
{
|
||||
int tmp = a[i] + 12345;
|
||||
tmp *= 914237;
|
||||
tmp += 12332;
|
||||
tmp *= 914237;
|
||||
tmp += 12332;
|
||||
tmp *= 914237;
|
||||
tmp -= 13;
|
||||
tmp *= 8000;
|
||||
b[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int a[256];
|
||||
int b[256];
|
||||
int c[256];
|
||||
for (int i = 0; i != 256; i++)
|
||||
{
|
||||
b[i] = 0;
|
||||
c[i] = 1;
|
||||
a[i] = i * i - 10 * i + 33;
|
||||
}
|
||||
foo (a, b);
|
||||
foo1 (a, c);
|
||||
|
||||
for (unsigned int i = 0; i != 256; i++)
|
||||
{
|
||||
if (b[i] != c[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
22
gcc/testsuite/gcc.target/i386/pr53533-5.c
Normal file
22
gcc/testsuite/gcc.target/i386/pr53533-5.c
Normal file
@ -0,0 +1,22 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fdump-tree-optimized" } */
|
||||
/* { dg-final { scan-tree-dump-times {(?n)\* 2147483647} 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times {(?n)\* 1073741823} 1 "optimized" } } */
|
||||
|
||||
#define INT_MAX 2147483647
|
||||
int
|
||||
foo (int a)
|
||||
{
|
||||
/* When a == -2, there's no overflow for (a + 1) * INT_MAX - 1.
|
||||
but overflow for a * INT_MAX + (INT_MAX - 1).
|
||||
Don't simpify it. */
|
||||
return (a + 1) * INT_MAX - 1;
|
||||
}
|
||||
|
||||
int
|
||||
foo1 (int a)
|
||||
{
|
||||
/* Be conservative here, don't simplify this as long as
|
||||
a * 2147483646 may overflow. */
|
||||
return 1073741823 * (a * 2 + 1);
|
||||
}
|
Loading…
Reference in New Issue
Block a user