mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-19 05:10:25 +08:00
Fold (add -1; zero_ext; add +1) operations to zero_ext when not overflow(PR37451, PR61837)
This "subtract/extend/add" existed for a long time and still annoying us (PR37451, part of PR61837) when converting from 32bits to 64bits, as the ctr register is used as 64bits on powerpc64, Andraw Pinski had a patch but caused some issue and reverted by Joseph S. Myers(PR37451, PR37782). Andraw: http://gcc.gnu.org/ml/gcc-patches/2008-09/msg01070.html http://gcc.gnu.org/ml/gcc-patches/2008-10/msg01321.html Joseph: https://gcc.gnu.org/legacy-ml/gcc-patches/2011-11/msg02405.html We still can do the simplification from "subtract/zero_ext/add" to "zero_ext" when loop iterations is known to be LT than MODE_MAX (only do simplify when counter+0x1 NOT overflow). Bootstrap and regression tested pass on Power8-LE. gcc/ChangeLog 2020-05-15 Xiong Hu Luo <luoxhu@linux.ibm.com> PR rtl-optimization/37451, part of PR target/61837 * loop-doloop.c (doloop_simplify_count): New function. Simplify (add -1; zero_ext; add +1) to zero_ext when not wrapping. (doloop_modify): Call doloop_simplify_count. gcc/testsuite/ChangeLog 2020-05-15 Xiong Hu Luo <luoxhu@linux.ibm.com> PR rtl-optimization/37451, part of PR target/61837 * gcc.target/powerpc/doloop-2.c: New test.
This commit is contained in:
parent
98aad12cd2
commit
8a15faa730
@ -1,3 +1,10 @@
|
||||
2020-05-14 Xiong Hu Luo <luoxhu@linux.ibm.com>
|
||||
|
||||
PR rtl-optimization/37451, part of PR target/61837
|
||||
* loop-doloop.c (doloop_simplify_count): New function. Simplify
|
||||
(add -1; zero_ext; add +1) to zero_ext when not wrapping.
|
||||
(doloop_modify): Call doloop_simplify_count.
|
||||
|
||||
2020-05-14 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR jit/94778
|
||||
|
@ -397,6 +397,42 @@ add_test (rtx cond, edge *e, basic_block dest)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Fold (add -1; zero_ext; add +1) operations to zero_ext if not wrapping. i.e:
|
||||
|
||||
73: r145:SI=r123:DI#0-0x1
|
||||
74: r144:DI=zero_extend (r145:SI)
|
||||
75: r143:DI=r144:DI+0x1
|
||||
...
|
||||
31: r135:CC=cmp (r123:DI,0)
|
||||
72: {pc={(r143:DI!=0x1)?L70:pc};r143:DI=r143:DI-0x1;...}
|
||||
|
||||
r123:DI#0-0x1 is param count derived from loop->niter_expr equal to number of
|
||||
loop iterations, if loop iterations expression doesn't overflow, then
|
||||
(zero_extend (r123:DI#0-1))+1 can be simplified to zero_extend. */
|
||||
|
||||
static rtx
|
||||
doloop_simplify_count (class loop *loop, scalar_int_mode mode, rtx count)
|
||||
{
|
||||
widest_int iterations;
|
||||
if (GET_CODE (count) == ZERO_EXTEND)
|
||||
{
|
||||
rtx extop0 = XEXP (count, 0);
|
||||
if (GET_CODE (extop0) == PLUS)
|
||||
{
|
||||
rtx addop0 = XEXP (extop0, 0);
|
||||
rtx addop1 = XEXP (extop0, 1);
|
||||
|
||||
if (get_max_loop_iterations (loop, &iterations)
|
||||
&& wi::ltu_p (iterations, GET_MODE_MASK (GET_MODE (addop0)))
|
||||
&& addop1 == constm1_rtx)
|
||||
return simplify_gen_unary (ZERO_EXTEND, mode, addop0,
|
||||
GET_MODE (addop0));
|
||||
}
|
||||
}
|
||||
|
||||
return simplify_gen_binary (PLUS, mode, count, const1_rtx);
|
||||
}
|
||||
|
||||
/* Modify the loop to use the low-overhead looping insn where LOOP
|
||||
describes the loop, DESC describes the number of iterations of the
|
||||
loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
|
||||
@ -477,7 +513,7 @@ doloop_modify (class loop *loop, class niter_desc *desc,
|
||||
}
|
||||
|
||||
if (increment_count)
|
||||
count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
|
||||
count = doloop_simplify_count (loop, mode, count);
|
||||
|
||||
/* Insert initialization of the count register into the loop header. */
|
||||
start_sequence ();
|
||||
|
@ -1,3 +1,8 @@
|
||||
2020-05-14 Xiong Hu Luo <luoxhu@linux.ibm.com>
|
||||
|
||||
PR rtl-optimization/37451, part of PR target/61837
|
||||
* gcc.target/powerpc/doloop-2.c: New test.
|
||||
|
||||
2020-05-14 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR jit/94778
|
||||
|
29
gcc/testsuite/gcc.target/powerpc/doloop-2.c
Normal file
29
gcc/testsuite/gcc.target/powerpc/doloop-2.c
Normal file
@ -0,0 +1,29 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fno-unroll-loops" } */
|
||||
|
||||
unsigned int
|
||||
foo1 (unsigned int l, int *a)
|
||||
{
|
||||
unsigned int i;
|
||||
for(i = 0;i < l; i++)
|
||||
a[i] = i;
|
||||
return l;
|
||||
}
|
||||
|
||||
int
|
||||
foo2 (int l, int *a)
|
||||
{
|
||||
int i;
|
||||
for(i = 0;i < l; i++)
|
||||
a[i] = i;
|
||||
return l;
|
||||
}
|
||||
|
||||
/* The place where we were getting an extra -1 is when converting from 32bits
|
||||
to 64bits as the ctr register is used as 64bits on powerpc64. We should be
|
||||
able to do this loop without "add -1/zero_ext/add 1" to the l to get the
|
||||
number of iterations of this loop still doing a do-loop. */
|
||||
|
||||
/* { dg-final { scan-assembler-not {(?n)\maddi .*,.*,-1$} } } */
|
||||
/* { dg-final { scan-assembler-times "bdnz" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "mtctr" 2 } } */
|
Loading…
x
Reference in New Issue
Block a user