mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-26 07:40:26 +08:00
PR90838: Support ctz idioms
Support common idioms for count trailing zeroes using an array lookup. The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic constant which when multiplied by a power of 2 creates a unique value in the top 5 or 6 bits. This is then indexed into a table which maps it to the number of trailing zeroes. When the table is valid, we emit a sequence using the target defined value for ctz (0): int ctz1 (unsigned x) { static const char table[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; } Is optimized to: rbit w0, w0 clz w0, w0 and w0, w0, 31 ret gcc/ PR tree-optimization/90838 * tree-ssa-forwprop.c (check_ctz_array): Add new function. (check_ctz_string): Likewise. (optimize_count_trailing_zeroes): Likewise. (simplify_count_trailing_zeroes): Likewise. (pass_forwprop::execute): Try ctz simplification. * match.pd: Add matching for ctz idioms. testsuite/ PR tree-optimization/90838 * testsuite/gcc.target/aarch64/pr90838.c: New test. From-SVN: r280132
This commit is contained in:
parent
9869896730
commit
b937050d30
gcc
@ -1,3 +1,13 @@
|
||||
2020-01-10 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
PR tree-optimization/90838
|
||||
* tree-ssa-forwprop.c (check_ctz_array): Add new function.
|
||||
(check_ctz_string): Likewise.
|
||||
(optimize_count_trailing_zeroes): Likewise.
|
||||
(simplify_count_trailing_zeroes): Likewise.
|
||||
(pass_forwprop::execute): Try ctz simplification.
|
||||
* match.pd: Add matching for ctz idioms.
|
||||
|
||||
2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.c (aarch64_invalid_conversion): New function
|
||||
|
@ -6152,3 +6152,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
(simplify
|
||||
(vec_perm vec_same_elem_p@0 @0 @1)
|
||||
@0)
|
||||
|
||||
/* Match count trailing zeroes for simplify_count_trailing_zeroes in fwprop.
|
||||
The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic
|
||||
constant which when multiplied by a power of 2 contains a unique value
|
||||
in the top 5 or 6 bits. This is then indexed into a table which maps it
|
||||
to the number of trailing zeroes. */
|
||||
(match (ctz_table_index @1 @2 @3)
|
||||
(rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3))
|
||||
|
@ -1,3 +1,7 @@
|
||||
2020-01-10 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* testsuite/gcc.target/aarch64/pr90838.c: New test.
|
||||
|
||||
2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com>
|
||||
|
||||
* g++.target/aarch64/bfloat_cpp_typecheck.C: New test.
|
||||
|
64
gcc/testsuite/gcc.target/aarch64/pr90838.c
Normal file
64
gcc/testsuite/gcc.target/aarch64/pr90838.c
Normal file
@ -0,0 +1,64 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
int ctz1 (unsigned x)
|
||||
{
|
||||
static const char table[32] =
|
||||
{
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
||||
};
|
||||
|
||||
return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
|
||||
}
|
||||
|
||||
int ctz2 (unsigned x)
|
||||
{
|
||||
const int u = 0;
|
||||
static short table[64] =
|
||||
{
|
||||
32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
|
||||
10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
|
||||
31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
|
||||
30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
|
||||
};
|
||||
|
||||
x = (x & -x) * 0x0450FBAF;
|
||||
return table[x >> 26];
|
||||
}
|
||||
|
||||
int ctz3 (unsigned x)
|
||||
{
|
||||
static int table[32] =
|
||||
{
|
||||
0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
|
||||
31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
|
||||
};
|
||||
|
||||
if (x == 0) return 32;
|
||||
x = (x & -x) * 0x04D7651F;
|
||||
return table[x >> 27];
|
||||
}
|
||||
|
||||
static const unsigned long long magic = 0x03f08c5392f756cdULL;
|
||||
|
||||
static const char table[64] = {
|
||||
0, 1, 12, 2, 13, 22, 17, 3,
|
||||
14, 33, 23, 36, 18, 58, 28, 4,
|
||||
62, 15, 34, 26, 24, 48, 50, 37,
|
||||
19, 55, 59, 52, 29, 44, 39, 5,
|
||||
63, 11, 21, 16, 32, 35, 57, 27,
|
||||
61, 25, 47, 49, 54, 51, 43, 38,
|
||||
10, 20, 31, 56, 60, 46, 53, 42,
|
||||
9, 30, 45, 41, 8, 40, 7, 6,
|
||||
};
|
||||
|
||||
int ctz4 (unsigned long x)
|
||||
{
|
||||
unsigned long lsb = x & -x;
|
||||
return table[(lsb * magic) >> 58];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "clz\t" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "and\t" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "cmp\t.*0" } } */
|
@ -48,6 +48,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "optabs-tree.h"
|
||||
#include "tree-vector-builder.h"
|
||||
#include "vec-perm-indices.h"
|
||||
#include "internal-fn.h"
|
||||
#include "cgraph.h"
|
||||
|
||||
/* This pass propagates the RHS of assignment statements into use
|
||||
sites of the LHS of the assignment. It's basically a specialized
|
||||
@ -1778,6 +1780,188 @@ simplify_rotate (gimple_stmt_iterator *gsi)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Check whether an array contains a valid ctz table. */
|
||||
static bool
|
||||
check_ctz_array (tree ctor, unsigned HOST_WIDE_INT mulc,
|
||||
HOST_WIDE_INT &zero_val, unsigned shift, unsigned bits)
|
||||
{
|
||||
tree elt, idx;
|
||||
unsigned HOST_WIDE_INT i, mask;
|
||||
unsigned matched = 0;
|
||||
|
||||
mask = ((HOST_WIDE_INT_1U << (bits - shift)) - 1) << shift;
|
||||
|
||||
zero_val = 0;
|
||||
|
||||
FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), i, idx, elt)
|
||||
{
|
||||
if (TREE_CODE (idx) != INTEGER_CST || TREE_CODE (elt) != INTEGER_CST)
|
||||
return false;
|
||||
if (i > bits * 2)
|
||||
return false;
|
||||
|
||||
unsigned HOST_WIDE_INT index = tree_to_shwi (idx);
|
||||
HOST_WIDE_INT val = tree_to_shwi (elt);
|
||||
|
||||
if (index == 0)
|
||||
{
|
||||
zero_val = val;
|
||||
matched++;
|
||||
}
|
||||
|
||||
if (val >= 0 && val < bits && (((mulc << val) & mask) >> shift) == index)
|
||||
matched++;
|
||||
|
||||
if (matched > bits)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check whether a string contains a valid ctz table. */
|
||||
static bool
|
||||
check_ctz_string (tree string, unsigned HOST_WIDE_INT mulc,
|
||||
HOST_WIDE_INT &zero_val, unsigned shift, unsigned bits)
|
||||
{
|
||||
unsigned HOST_WIDE_INT len = TREE_STRING_LENGTH (string);
|
||||
unsigned HOST_WIDE_INT mask;
|
||||
unsigned matched = 0;
|
||||
const unsigned char *p = (const unsigned char *) TREE_STRING_POINTER (string);
|
||||
|
||||
if (len < bits || len > bits * 2)
|
||||
return false;
|
||||
|
||||
mask = ((HOST_WIDE_INT_1U << (bits - shift)) - 1) << shift;
|
||||
|
||||
zero_val = p[0];
|
||||
|
||||
for (unsigned i = 0; i < len; i++)
|
||||
if (p[i] < bits && (((mulc << p[i]) & mask) >> shift) == i)
|
||||
matched++;
|
||||
|
||||
return matched == bits;
|
||||
}
|
||||
|
||||
/* Recognize count trailing zeroes idiom.
|
||||
The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic
|
||||
constant which when multiplied by a power of 2 creates a unique value
|
||||
in the top 5 or 6 bits. This is then indexed into a table which maps it
|
||||
to the number of trailing zeroes. Array[0] is returned so the caller can
|
||||
emit an appropriate sequence depending on whether ctz (0) is defined on
|
||||
the target. */
|
||||
static bool
|
||||
optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
|
||||
tree tshift, HOST_WIDE_INT &zero_val)
|
||||
{
|
||||
tree type = TREE_TYPE (array_ref);
|
||||
tree array = TREE_OPERAND (array_ref, 0);
|
||||
|
||||
gcc_assert (TREE_CODE (mulc) == INTEGER_CST);
|
||||
gcc_assert (TREE_CODE (tshift) == INTEGER_CST);
|
||||
|
||||
tree input_type = TREE_TYPE (x);
|
||||
unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type));
|
||||
|
||||
/* Check the array is not wider than integer type and the input is a 32-bit
|
||||
or 64-bit type. */
|
||||
if (TYPE_PRECISION (type) > 32)
|
||||
return false;
|
||||
if (input_bits != 32 && input_bits != 64)
|
||||
return false;
|
||||
|
||||
if (!direct_internal_fn_supported_p (IFN_CTZ, input_type, OPTIMIZE_FOR_BOTH))
|
||||
return false;
|
||||
|
||||
/* Check the lower bound of the array is zero. */
|
||||
tree low = array_ref_low_bound (array_ref);
|
||||
if (!low || !integer_zerop (low))
|
||||
return false;
|
||||
|
||||
unsigned shiftval = tree_to_uhwi (tshift);
|
||||
|
||||
/* Check the shift extracts the top 5..7 bits. */
|
||||
if (shiftval < input_bits - 7 || shiftval > input_bits - 5)
|
||||
return false;
|
||||
|
||||
tree ctor = ctor_for_folding (array);
|
||||
if (!ctor)
|
||||
return false;
|
||||
|
||||
unsigned HOST_WIDE_INT val = tree_to_uhwi (mulc);
|
||||
|
||||
if (TREE_CODE (ctor) == CONSTRUCTOR)
|
||||
return check_ctz_array (ctor, val, zero_val, shiftval, input_bits);
|
||||
|
||||
if (TREE_CODE (ctor) == STRING_CST)
|
||||
return check_ctz_string (ctor, val, zero_val, shiftval, input_bits);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Match.pd function to match the ctz expression. */
|
||||
extern bool gimple_ctz_table_index (tree, tree *, tree (*)(tree));
|
||||
|
||||
static bool
|
||||
simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
|
||||
{
|
||||
gimple *stmt = gsi_stmt (*gsi);
|
||||
tree array_ref = gimple_assign_rhs1 (stmt);
|
||||
tree res_ops[3];
|
||||
HOST_WIDE_INT zero_val;
|
||||
|
||||
gcc_checking_assert (TREE_CODE (array_ref) == ARRAY_REF);
|
||||
|
||||
if (!gimple_ctz_table_index (TREE_OPERAND (array_ref, 1), &res_ops[0], NULL))
|
||||
return false;
|
||||
|
||||
if (optimize_count_trailing_zeroes (array_ref, res_ops[0],
|
||||
res_ops[1], res_ops[2], zero_val))
|
||||
{
|
||||
tree type = TREE_TYPE (res_ops[0]);
|
||||
HOST_WIDE_INT ctzval;
|
||||
HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type));
|
||||
bool zero_ok = CTZ_DEFINED_VALUE_AT_ZERO (TYPE_MODE (type), ctzval) == 2;
|
||||
|
||||
/* Skip if there is no value defined at zero, or if we can't easily
|
||||
return the correct value for zero. */
|
||||
if (!zero_ok)
|
||||
return false;
|
||||
if (zero_val != ctzval && !(zero_val == 0 && ctzval == type_size))
|
||||
return false;
|
||||
|
||||
gimple_seq seq = NULL;
|
||||
gimple *g;
|
||||
gcall *call = gimple_build_call_internal (IFN_CTZ, 1, res_ops[0]);
|
||||
gimple_set_location (call, gimple_location (stmt));
|
||||
gimple_set_lhs (call, make_ssa_name (integer_type_node));
|
||||
gimple_seq_add_stmt (&seq, call);
|
||||
|
||||
tree prev_lhs = gimple_call_lhs (call);
|
||||
|
||||
/* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */
|
||||
if (zero_val == 0 && ctzval == type_size)
|
||||
{
|
||||
g = gimple_build_assign (make_ssa_name (integer_type_node),
|
||||
BIT_AND_EXPR, prev_lhs,
|
||||
build_int_cst (integer_type_node,
|
||||
type_size - 1));
|
||||
gimple_set_location (g, gimple_location (stmt));
|
||||
gimple_seq_add_stmt (&seq, g);
|
||||
prev_lhs = gimple_assign_lhs (g);
|
||||
}
|
||||
|
||||
g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs);
|
||||
gimple_seq_add_stmt (&seq, g);
|
||||
gsi_replace_with_seq (gsi, seq, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* Combine an element access with a shuffle. Returns true if there were
|
||||
any changes made, else it returns false. */
|
||||
|
||||
@ -2874,6 +3058,8 @@ pass_forwprop::execute (function *fun)
|
||||
else if (code == CONSTRUCTOR
|
||||
&& TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE)
|
||||
changed = simplify_vector_constructor (&gsi);
|
||||
else if (code == ARRAY_REF)
|
||||
changed = simplify_count_trailing_zeroes (&gsi);
|
||||
break;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user