diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b796203aed6b..cc076c8ea42c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2020-05-05 Jakub Jelinek + PR tree-optimization/94800 + * match.pd (X + (X << C) to X * (1 + (1 << C)), + (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2))): New + canonicalizations. + PR target/94942 * config/i386/mmx.md (*vec_dupv4hi): Use xYw constraints instead of Yv. diff --git a/gcc/match.pd b/gcc/match.pd index c45e94c0c6a9..d95751796551 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2570,6 +2570,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && single_use (@3)) (mult (plusminus @2 { build_one_cst (type); }) @0)))))) +#if GIMPLE +/* Canonicalize X + (X << C) into X * (1 + (1 << C)) and + (X << C1) + (X << C2) into X * ((1 << C1) + (1 << C2)). */ +(simplify + (plus:c @0 (lshift:s @0 INTEGER_CST@1)) + (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && tree_fits_uhwi_p (@1) + && tree_to_uhwi (@1) < element_precision (type)) + (with { tree t = type; + if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t); + wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1), + element_precision (type)); + w += 1; + tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t) + : t, w); + cst = build_uniform_cst (t, cst); } + (convert (mult (convert:t @0) { cst; }))))) +(simplify + (plus (lshift:s @0 INTEGER_CST@1) (lshift:s @0 INTEGER_CST@2)) + (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && tree_fits_uhwi_p (@1) + && tree_to_uhwi (@1) < element_precision (type) + && tree_fits_uhwi_p (@2) + && tree_to_uhwi (@2) < element_precision (type)) + (with { tree t = type; + if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t); + unsigned int prec = element_precision (type); + wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1), prec); + w += wi::set_bit_in_zero (tree_to_uhwi (@2), prec); + tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t) + : t, w); + cst = build_uniform_cst (t, cst); } + (convert (mult (convert:t @0) { cst; }))))) +#endif + /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */ (for minmax (min max FMIN_ALL FMAX_ALL) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6475b7d6ff46..2b4874158461 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,11 @@ 2020-05-05 Jakub Jelinek + PR tree-optimization/94800 + * gcc.dg/tree-ssa/pr94800.c: New test. + * gcc.dg/tree-ssa/popcount5.c: New test. + * gcc.dg/tree-ssa/popcount5l.c: New test. + * gcc.dg/tree-ssa/popcount5ll.c: New test. + PR target/94942 * gcc.target/i386/pr94942.c: New test. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5.c new file mode 100644 index 000000000000..6358d4f5600f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5.c @@ -0,0 +1,22 @@ +/* PR tree-optimization/94800 */ +/* { dg-do compile } */ +/* { dg-require-effective-target popcount } */ +/* { dg-require-effective-target int32plus } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned m1 = 0x55555555UL; +const unsigned m2 = 0x33333333UL; +const unsigned m4 = 0x0F0F0F0FUL; +const int shift = 24; + +int popcount64c(unsigned x) +{ + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + x += (x << 8); + x += (x << 16); + return x >> shift; +} + +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c new file mode 100644 index 000000000000..b761667099f3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c @@ -0,0 +1,32 @@ +/* PR tree-optimization/94800 */ +/* { dg-do compile { target int32plus } } */ +/* { dg-require-effective-target popcountl } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +#if __SIZEOF_LONG__ == 4 +const unsigned long m1 = 0x55555555UL; +const unsigned long m2 = 0x33333333UL; +const unsigned long m4 = 0x0F0F0F0FUL; +const int shift = 24; +#else +const unsigned long m1 = 0x5555555555555555UL; +const unsigned long m2 = 0x3333333333333333UL; +const unsigned long m4 = 0x0f0f0f0f0f0f0f0fUL; +const int shift = 56; +#endif + + +int popcount64c(unsigned long x) +{ + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + x += (x << 8); + x += (x << 16); +#if __SIZEOF_LONG__ != 4 + x += (x << 32); +#endif + return x >> shift; +} + +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c new file mode 100644 index 000000000000..831d5e1722ad --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c @@ -0,0 +1,22 @@ +/* PR tree-optimization/94800 */ +/* { dg-do compile } */ +/* { dg-require-effective-target popcountll } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +const unsigned long long m1 = 0x5555555555555555ULL; +const unsigned long long m2 = 0x3333333333333333ULL; +const unsigned long long m4 = 0x0F0F0F0F0F0F0F0FULL; +const int shift = 56; + +int popcount64c(unsigned long long x) +{ + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + x += (x << 8); + x += (x << 16); + x += (x << 32); + return x >> shift; +} + +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94800.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94800.c new file mode 100644 index 000000000000..4f92df3147b8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94800.c @@ -0,0 +1,80 @@ +/* PR tree-optimization/94800 */ +/* { dg-do compile { target { ilp32 || lp64 } } } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times " \* 72340172838076673" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \* 16843009" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \* 289360691352306692" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \* 1229782938247303441" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */ + +unsigned long long int +foo (unsigned long long int x) +{ + unsigned long long int a = x + (x << 8); + unsigned long long int b = a + (a << 16); + unsigned long long int c = b + (b << 32); + return c; +} + +unsigned int +bar (unsigned int x) +{ + unsigned int a = x + (x << 8); + unsigned int b = a + (a << 16); + return b; +} + +unsigned long long int +baz (unsigned long long int x) +{ + unsigned long long int a = (x << 2) + (x << 10); + unsigned long long int b = a + (a << 16); + unsigned long long int c = b + (b << 32); + return c; +} + +unsigned long long int +qux (unsigned long long int x) +{ + unsigned long long int a = x + (x << 4); + unsigned long long int b = a + (a << 8); + unsigned long long int c = b + (b << 16); + unsigned long long int d = c + (c << 32); + return d; +} + +long long int +quux (long long int x) +{ + long long int a = x + (x << 8); + long long int b = a + (a << 16); + long long int c = b + (b << 32); + return c; +} + +int +corge (int x) +{ + int a = x + (x << 8); + int b = a + (a << 16); + return b; +} + +long long int +garply (long long int x) +{ + long long int a = (x << 2) + (x << 10); + long long int b = a + (a << 16); + long long int c = b + (b << 32); + return c; +} + +long long int +waldo (long long int x) +{ + long long int a = x + (x << 4); + long long int b = a + (a << 8); + long long int c = b + (b << 16); + long long int d = c + (c << 32); + return d; +}