2
0
mirror of git://gcc.gnu.org/git/gcc.git synced 2025-04-16 14:21:14 +08:00

re PR tree-optimization/92712 (Performance regression with assumed values)

PR tree-optimization/92712
	* match.pd ((A * B) +- A -> (B +- 1) * A,
	A +- (A * B) -> (1 +- B) * A): Allow optimizing signed integers
	even when we don't know anything about range of A, but do know
	something about range of B and the simplification won't introduce
	new UB.

	* gcc.dg/tree-ssa/pr92712-1.c: New test.
	* gcc.dg/tree-ssa/pr92712-2.c: New test.
	* gcc.dg/tree-ssa/pr92712-3.c: New test.
	* gfortran.dg/loop_versioning_1.f90: Adjust expected number of
	likely to be innermost dimension messages.
	* gfortran.dg/loop_versioning_10.f90: Likewise.
	* gfortran.dg/loop_versioning_6.f90: Likewise.

From-SVN: r278894
This commit is contained in:
Jakub Jelinek 2019-12-02 09:51:49 +01:00
parent 9b14fc3326
commit a213ab3856
9 changed files with 179 additions and 10 deletions

@ -1,4 +1,13 @@
2019-12-02 Feng Xue <fxue@os.amperecomputing.com>
2019-12-02 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/92712
* match.pd ((A * B) +- A -> (B +- 1) * A,
A +- (A * B) -> (1 +- B) * A): Allow optimizing signed integers
even when we don't know anything about range of A, but do know
something about range of B and the simplification won't introduce
new UB.
2019-12-02 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/92133
* doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option.
@ -4543,7 +4552,7 @@
(lto_free_file_name_hash): New function.
* lto-streamer.h (lto_free_file_name_hash): New.
2019-11-07 Feng Xue <fxue@os.amperecomputing.com>
2019-11-07 Feng Xue <fxue@os.amperecomputing.com>
PR tree-optimization/89134
* doc/invoke.texi (min-loop-cond-split-prob): Document new --params.

@ -2480,18 +2480,42 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plusminus @0 (mult:c@3 @0 @2))
(if ((!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type)
/* For @0 + @0*@2 this transformation would introduce UB
(where there was none before) for @0 in [-1,0] and @2 max.
For @0 - @0*@2 this transformation would introduce UB
for @0 0 and @2 in [min,min+1] or @0 -1 and @2 min+1. */
|| (INTEGRAL_TYPE_P (type)
&& tree_expr_nonzero_p (@0)
&& expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
&& ((tree_expr_nonzero_p (@0)
&& expr_not_equal_to (@0,
wi::minus_one (TYPE_PRECISION (type))))
|| (plusminus == PLUS_EXPR
? expr_not_equal_to (@2,
wi::max_value (TYPE_PRECISION (type), SIGNED))
/* Let's ignore the @0 -1 and @2 min case. */
: (expr_not_equal_to (@2,
wi::min_value (TYPE_PRECISION (type), SIGNED))
&& expr_not_equal_to (@2,
wi::min_value (TYPE_PRECISION (type), SIGNED)
+ 1))))))
&& single_use (@3))
(mult (plusminus { build_one_cst (type); } @2) @0)))
(simplify
(plusminus (mult:c@3 @0 @2) @0)
(if ((!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type)
/* For @0*@2 + @0 this transformation would introduce UB
(where there was none before) for @0 in [-1,0] and @2 max.
For @0*@2 - @0 this transformation would introduce UB
for @0 0 and @2 min. */
|| (INTEGRAL_TYPE_P (type)
&& tree_expr_nonzero_p (@0)
&& expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
&& ((tree_expr_nonzero_p (@0)
&& (plusminus == MINUS_EXPR
|| expr_not_equal_to (@0,
wi::minus_one (TYPE_PRECISION (type)))))
|| expr_not_equal_to (@2,
(plusminus == PLUS_EXPR
? wi::max_value (TYPE_PRECISION (type), SIGNED)
: wi::min_value (TYPE_PRECISION (type), SIGNED))))))
&& single_use (@3))
(mult (plusminus @2 { build_one_cst (type); }) @0))))))

@ -1,3 +1,14 @@
2019-12-02 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/92712
* gcc.dg/tree-ssa/pr92712-1.c: New test.
* gcc.dg/tree-ssa/pr92712-2.c: New test.
* gcc.dg/tree-ssa/pr92712-3.c: New test.
* gfortran.dg/loop_versioning_1.f90: Adjust expected number of
likely to be innermost dimension messages.
* gfortran.dg/loop_versioning_10.f90: Likewise.
* gfortran.dg/loop_versioning_6.f90: Likewise.
2019-12-02 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/92133

@ -0,0 +1,21 @@
/* PR tree-optimization/92712 */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump " = \[tv]_\[0-9]*\\\(D\\\) \\* \[tv]_\[0-9]*\\\(D\\\);" "optimized" } } */
static int
foo (int t, int v)
{
int i, x = 0;
for (int i = 0; i < t; ++i)
x += v;
return x;
}
int
bar (int t, int v)
{
if (t < 0)
__builtin_unreachable ();
return foo (t, v);
}

@ -0,0 +1,66 @@
/* PR tree-optimization/92712 */
/* { dg-do compile } */
/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times " = \[tv]_\[0-9]*\\\(D\\\) \\* \[tv]_\[0-9]*\\\(D\\\);" 7 "optimized" } } */
int
f1 (int t, int v)
{
int a = t - 1;
int b = a * v;
return b + v;
}
int
f2 (int t, int v)
{
int a = t - 1;
int b = a * v;
return v + b;
}
int
f3 (int t, int v)
{
int a = t + 1;
int b = a * v;
return b - v;
}
int
f4 (int t, int v)
{
int a = 1 - t;
int b = a * v;
return v - b;
}
int
f5 (int t, int v)
{
if (v == 0 || v == -1)
__builtin_unreachable ();
int a = t - 1U;
int b = a * v;
return b + v;
}
int
f6 (int t, int v)
{
if (v == 0 || v == -1)
__builtin_unreachable ();
int a = t - 1U;
int b = a * v;
return v + b;
}
int
f7 (int t, int v)
{
if (v == 0)
__builtin_unreachable ();
int a = t + 1U;
int b = a * v;
return b - v;
}

@ -0,0 +1,36 @@
/* PR tree-optimization/92712 */
/* { dg-do compile } */
/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-not " = \[tv]_\[0-9]*\\\(D\\\) \\* \[tv]_\[0-9]*\\\(D\\\);" "optimized" } } */
int
f1 (int t, int v)
{
int a = t - 1U;
int b = a * v;
return b + v;
}
int
f2 (int t, int v)
{
int a = t - 1U;
int b = a * v;
return v + b;
}
int
f3 (int t, int v)
{
int a = t + 1U;
int b = a * v;
return b - v;
}
int
f4 (int t, int v)
{
int a = 1U - t;
int b = a * v;
return v - b;
}

@ -23,6 +23,6 @@ subroutine f3(x, limit, step)
end do
end subroutine f3
! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 2 "lversion" } }
! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 1 "lversion" } }
! { dg-final { scan-tree-dump-times {want to version containing loop} 3 "lversion" } }
! { dg-final { scan-tree-dump-times {versioned this loop} 3 "lversion" } }

@ -26,6 +26,6 @@ subroutine f4(x, i)
end do
end subroutine f4
! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 6 "lversion" } }
! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 4 "lversion" } }
! { dg-final { scan-tree-dump-times {want to version} 4 "lversion" } }
! { dg-final { scan-tree-dump-times {versioned} 4 "lversion" } }

@ -89,5 +89,7 @@ subroutine f9(x, limit, step)
end do
end subroutine f9
! { dg-final { scan-tree-dump-times {want to version containing loop} 9 "lversion" } }
! { dg-final { scan-tree-dump-times {versioned this loop} 9 "lversion" } }
! { dg-final { scan-tree-dump-times {want to version containing loop} 9 "lversion" { target lp64 } } }
! { dg-final { scan-tree-dump-times {versioned this loop} 9 "lversion" { target lp64 } } }
! { dg-final { scan-tree-dump-times {want to version containing loop} 8 "lversion" { target { ! lp64 } } } }
! { dg-final { scan-tree-dump-times {versioned this loop} 8 "lversion" { target { ! lp64 } } } }