mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-21 15:41:09 +08:00
match.pd: Canonicalize (x + (x << cst)) into (x * cst2) [PR94800]
The popcount* testcases show yet another creative way to write popcount, but rather than adjusting the popcount matcher to deal with it, I think we just should canonicalize those (X + (X << C) to X * (1 + (1 << C)) and (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2)), because for multiplication we already have simplification rules that can handle nested multiplication (X * CST1 * CST2), while the the shifts and adds we have nothing like that. And user could have written the multiplication anyway, so if we don't emit the fastest or smallest code for the multiplication by constant, we should improve that. At least on the testcases seems the emitted code is reasonable according to cost, except that perhaps we could in some cases try to improve expansion of vector multiplication by uniform constant. 2020-05-05 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/94800 * match.pd (X + (X << C) to X * (1 + (1 << C)), (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2))): New canonicalizations. * gcc.dg/tree-ssa/pr94800.c: New test. * gcc.dg/tree-ssa/popcount5.c: New test. * gcc.dg/tree-ssa/popcount5l.c: New test. * gcc.dg/tree-ssa/popcount5ll.c: New test.
This commit is contained in:
parent
7f916201ac
commit
144aee70b8
@ -1,5 +1,10 @@
|
||||
2020-05-05 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/94800
|
||||
* match.pd (X + (X << C) to X * (1 + (1 << C)),
|
||||
(X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2))): New
|
||||
canonicalizations.
|
||||
|
||||
PR target/94942
|
||||
* config/i386/mmx.md (*vec_dupv4hi): Use xYw constraints instead of Yv.
|
||||
|
||||
|
35
gcc/match.pd
35
gcc/match.pd
@ -2570,6 +2570,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
&& single_use (@3))
|
||||
(mult (plusminus @2 { build_one_cst (type); }) @0))))))
|
||||
|
||||
#if GIMPLE
|
||||
/* Canonicalize X + (X << C) into X * (1 + (1 << C)) and
|
||||
(X << C1) + (X << C2) into X * ((1 << C1) + (1 << C2)). */
|
||||
(simplify
|
||||
(plus:c @0 (lshift:s @0 INTEGER_CST@1))
|
||||
(if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||
&& tree_fits_uhwi_p (@1)
|
||||
&& tree_to_uhwi (@1) < element_precision (type))
|
||||
(with { tree t = type;
|
||||
if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
|
||||
wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1),
|
||||
element_precision (type));
|
||||
w += 1;
|
||||
tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
|
||||
: t, w);
|
||||
cst = build_uniform_cst (t, cst); }
|
||||
(convert (mult (convert:t @0) { cst; })))))
|
||||
(simplify
|
||||
(plus (lshift:s @0 INTEGER_CST@1) (lshift:s @0 INTEGER_CST@2))
|
||||
(if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||
&& tree_fits_uhwi_p (@1)
|
||||
&& tree_to_uhwi (@1) < element_precision (type)
|
||||
&& tree_fits_uhwi_p (@2)
|
||||
&& tree_to_uhwi (@2) < element_precision (type))
|
||||
(with { tree t = type;
|
||||
if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
|
||||
unsigned int prec = element_precision (type);
|
||||
wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1), prec);
|
||||
w += wi::set_bit_in_zero (tree_to_uhwi (@2), prec);
|
||||
tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
|
||||
: t, w);
|
||||
cst = build_uniform_cst (t, cst); }
|
||||
(convert (mult (convert:t @0) { cst; })))))
|
||||
#endif
|
||||
|
||||
/* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */
|
||||
|
||||
(for minmax (min max FMIN_ALL FMAX_ALL)
|
||||
|
@ -1,5 +1,11 @@
|
||||
2020-05-05 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/94800
|
||||
* gcc.dg/tree-ssa/pr94800.c: New test.
|
||||
* gcc.dg/tree-ssa/popcount5.c: New test.
|
||||
* gcc.dg/tree-ssa/popcount5l.c: New test.
|
||||
* gcc.dg/tree-ssa/popcount5ll.c: New test.
|
||||
|
||||
PR target/94942
|
||||
* gcc.target/i386/pr94942.c: New test.
|
||||
|
||||
|
22
gcc/testsuite/gcc.dg/tree-ssa/popcount5.c
Normal file
22
gcc/testsuite/gcc.dg/tree-ssa/popcount5.c
Normal file
@ -0,0 +1,22 @@
|
||||
/* PR tree-optimization/94800 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target popcount } */
|
||||
/* { dg-require-effective-target int32plus } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
const unsigned m1 = 0x55555555UL;
|
||||
const unsigned m2 = 0x33333333UL;
|
||||
const unsigned m4 = 0x0F0F0F0FUL;
|
||||
const int shift = 24;
|
||||
|
||||
int popcount64c(unsigned x)
|
||||
{
|
||||
x -= (x >> 1) & m1;
|
||||
x = (x & m2) + ((x >> 2) & m2);
|
||||
x = (x + (x >> 4)) & m4;
|
||||
x += (x << 8);
|
||||
x += (x << 16);
|
||||
return x >> shift;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
|
32
gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c
Normal file
32
gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c
Normal file
@ -0,0 +1,32 @@
|
||||
/* PR tree-optimization/94800 */
|
||||
/* { dg-do compile { target int32plus } } */
|
||||
/* { dg-require-effective-target popcountl } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#if __SIZEOF_LONG__ == 4
|
||||
const unsigned long m1 = 0x55555555UL;
|
||||
const unsigned long m2 = 0x33333333UL;
|
||||
const unsigned long m4 = 0x0F0F0F0FUL;
|
||||
const int shift = 24;
|
||||
#else
|
||||
const unsigned long m1 = 0x5555555555555555UL;
|
||||
const unsigned long m2 = 0x3333333333333333UL;
|
||||
const unsigned long m4 = 0x0f0f0f0f0f0f0f0fUL;
|
||||
const int shift = 56;
|
||||
#endif
|
||||
|
||||
|
||||
int popcount64c(unsigned long x)
|
||||
{
|
||||
x -= (x >> 1) & m1;
|
||||
x = (x & m2) + ((x >> 2) & m2);
|
||||
x = (x + (x >> 4)) & m4;
|
||||
x += (x << 8);
|
||||
x += (x << 16);
|
||||
#if __SIZEOF_LONG__ != 4
|
||||
x += (x << 32);
|
||||
#endif
|
||||
return x >> shift;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
|
22
gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
Normal file
22
gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
Normal file
@ -0,0 +1,22 @@
|
||||
/* PR tree-optimization/94800 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target popcountll } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
const unsigned long long m1 = 0x5555555555555555ULL;
|
||||
const unsigned long long m2 = 0x3333333333333333ULL;
|
||||
const unsigned long long m4 = 0x0F0F0F0F0F0F0F0FULL;
|
||||
const int shift = 56;
|
||||
|
||||
int popcount64c(unsigned long long x)
|
||||
{
|
||||
x -= (x >> 1) & m1;
|
||||
x = (x & m2) + ((x >> 2) & m2);
|
||||
x = (x + (x >> 4)) & m4;
|
||||
x += (x << 8);
|
||||
x += (x << 16);
|
||||
x += (x << 32);
|
||||
return x >> shift;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
|
80
gcc/testsuite/gcc.dg/tree-ssa/pr94800.c
Normal file
80
gcc/testsuite/gcc.dg/tree-ssa/pr94800.c
Normal file
@ -0,0 +1,80 @@
|
||||
/* PR tree-optimization/94800 */
|
||||
/* { dg-do compile { target { ilp32 || lp64 } } } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
/* { dg-final { scan-tree-dump-times " \* 72340172838076673" 2 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times " \* 16843009" 2 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times " \* 289360691352306692" 2 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times " \* 1229782938247303441" 2 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */
|
||||
|
||||
unsigned long long int
|
||||
foo (unsigned long long int x)
|
||||
{
|
||||
unsigned long long int a = x + (x << 8);
|
||||
unsigned long long int b = a + (a << 16);
|
||||
unsigned long long int c = b + (b << 32);
|
||||
return c;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
bar (unsigned int x)
|
||||
{
|
||||
unsigned int a = x + (x << 8);
|
||||
unsigned int b = a + (a << 16);
|
||||
return b;
|
||||
}
|
||||
|
||||
unsigned long long int
|
||||
baz (unsigned long long int x)
|
||||
{
|
||||
unsigned long long int a = (x << 2) + (x << 10);
|
||||
unsigned long long int b = a + (a << 16);
|
||||
unsigned long long int c = b + (b << 32);
|
||||
return c;
|
||||
}
|
||||
|
||||
unsigned long long int
|
||||
qux (unsigned long long int x)
|
||||
{
|
||||
unsigned long long int a = x + (x << 4);
|
||||
unsigned long long int b = a + (a << 8);
|
||||
unsigned long long int c = b + (b << 16);
|
||||
unsigned long long int d = c + (c << 32);
|
||||
return d;
|
||||
}
|
||||
|
||||
long long int
|
||||
quux (long long int x)
|
||||
{
|
||||
long long int a = x + (x << 8);
|
||||
long long int b = a + (a << 16);
|
||||
long long int c = b + (b << 32);
|
||||
return c;
|
||||
}
|
||||
|
||||
int
|
||||
corge (int x)
|
||||
{
|
||||
int a = x + (x << 8);
|
||||
int b = a + (a << 16);
|
||||
return b;
|
||||
}
|
||||
|
||||
long long int
|
||||
garply (long long int x)
|
||||
{
|
||||
long long int a = (x << 2) + (x << 10);
|
||||
long long int b = a + (a << 16);
|
||||
long long int c = b + (b << 32);
|
||||
return c;
|
||||
}
|
||||
|
||||
long long int
|
||||
waldo (long long int x)
|
||||
{
|
||||
long long int a = x + (x << 4);
|
||||
long long int b = a + (a << 8);
|
||||
long long int c = b + (b << 16);
|
||||
long long int d = c + (c << 32);
|
||||
return d;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user