mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 23:31:21 +08:00
re PR middle-end/82853 (Optimize x % 3 == 0 without modulo)
PR middle-end/82853 * expr.h (maybe_optimize_mod_cmp): Declare. * expr.c (mod_inv): New function. (maybe_optimize_mod_cmp): New function. (do_store_flag): Use it. * cfgexpand.c (expand_gimple_cond): Likewise. * gcc.target/i386/pr82853-1.c: New test. * gcc.target/i386/pr82853-2.c: New test. From-SVN: r264248
This commit is contained in:
parent
8e36332cf6
commit
c0cbe5260f
@ -1,3 +1,12 @@
|
||||
2018-09-12 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/82853
|
||||
* expr.h (maybe_optimize_mod_cmp): Declare.
|
||||
* expr.c (mod_inv): New function.
|
||||
(maybe_optimize_mod_cmp): New function.
|
||||
(do_store_flag): Use it.
|
||||
* cfgexpand.c (expand_gimple_cond): Likewise.
|
||||
|
||||
2018-09-09 Cesar Philippidis <cesar@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
|
||||
|
@ -2477,6 +2477,13 @@ expand_gimple_cond (basic_block bb, gcond *stmt)
|
||||
}
|
||||
}
|
||||
|
||||
/* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
|
||||
into (x - C2) * C3 < C4. */
|
||||
if ((code == EQ_EXPR || code == NE_EXPR)
|
||||
&& TREE_CODE (op0) == SSA_NAME
|
||||
&& TREE_CODE (op1) == INTEGER_CST)
|
||||
code = maybe_optimize_mod_cmp (code, &op0, &op1);
|
||||
|
||||
last2 = last = get_last_insn ();
|
||||
|
||||
extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
|
||||
|
257
gcc/expr.c
257
gcc/expr.c
@ -11491,6 +11491,243 @@ string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree *nonstr)
|
||||
return init;
|
||||
}
|
||||
|
||||
/* Compute the modular multiplicative inverse of A modulo M
|
||||
using extended Euclid's algorithm. Assumes A and M are coprime. */
|
||||
static wide_int
|
||||
mod_inv (const wide_int &a, const wide_int &b)
|
||||
{
|
||||
/* Verify the assumption. */
|
||||
gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
|
||||
|
||||
unsigned int p = a.get_precision () + 1;
|
||||
gcc_checking_assert (b.get_precision () + 1 == p);
|
||||
wide_int c = wide_int::from (a, p, UNSIGNED);
|
||||
wide_int d = wide_int::from (b, p, UNSIGNED);
|
||||
wide_int x0 = wide_int::from (0, p, UNSIGNED);
|
||||
wide_int x1 = wide_int::from (1, p, UNSIGNED);
|
||||
|
||||
if (wi::eq_p (b, 1))
|
||||
return wide_int::from (1, p, UNSIGNED);
|
||||
|
||||
while (wi::gt_p (c, 1, UNSIGNED))
|
||||
{
|
||||
wide_int t = d;
|
||||
wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
|
||||
c = t;
|
||||
wide_int s = x0;
|
||||
x0 = wi::sub (x1, wi::mul (q, x0));
|
||||
x1 = s;
|
||||
}
|
||||
if (wi::lt_p (x1, 0, SIGNED))
|
||||
x1 += d;
|
||||
return x1;
|
||||
}
|
||||
|
||||
/* Attempt to optimize unsigned (X % C1) == C2 (or (X % C1) != C2).
|
||||
If C1 is odd to:
|
||||
(X - C2) * C3 <= C4 (or >), where
|
||||
C3 is modular multiplicative inverse of C1 and 1<<prec and
|
||||
C4 is ((1<<prec) - 1) / C1 or ((1<<prec) - 1) / C1 - 1 (the latter
|
||||
if C2 > ((1<<prec) - 1) % C1).
|
||||
If C1 is even, S = ctz (C1) and C2 is 0, use
|
||||
((X * C3) r>> S) <= C4, where C3 is modular multiplicative
|
||||
inverse of C1>>S and 1<<prec and C4 is (((1<<prec) - 1) / (C1>>S)) >> S.
|
||||
|
||||
For signed (X % C1) == 0 if C1 is odd to (all operations in it
|
||||
unsigned):
|
||||
(X * C3) + C4 <= 2 * C4, where
|
||||
C3 is modular multiplicative inverse of (unsigned) C1 and 1<<prec and
|
||||
C4 is ((1<<(prec - 1) - 1) / C1).
|
||||
If C1 is even, S = ctz(C1), use
|
||||
((X * C3) + C4) r>> S <= (C4 >> (S - 1))
|
||||
where C3 is modular multiplicative inverse of (unsigned)(C1>>S) and 1<<prec
|
||||
and C4 is ((1<<(prec - 1) - 1) / (C1>>S)) & (-1<<S).
|
||||
|
||||
See the Hacker's Delight book, section 10-17. */
|
||||
enum tree_code
|
||||
maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
|
||||
{
|
||||
gcc_checking_assert (code == EQ_EXPR || code == NE_EXPR);
|
||||
gcc_checking_assert (TREE_CODE (*arg1) == INTEGER_CST);
|
||||
|
||||
if (optimize < 2)
|
||||
return code;
|
||||
|
||||
gimple *stmt = get_def_for_expr (*arg0, TRUNC_MOD_EXPR);
|
||||
if (stmt == NULL)
|
||||
return code;
|
||||
|
||||
tree treeop0 = gimple_assign_rhs1 (stmt);
|
||||
tree treeop1 = gimple_assign_rhs2 (stmt);
|
||||
if (TREE_CODE (treeop0) != SSA_NAME
|
||||
|| TREE_CODE (treeop1) != INTEGER_CST
|
||||
/* x % pow2 is handled right already. */
|
||||
|| integer_pow2p (treeop1)
|
||||
/* Don't optimize the undefined behavior case x % 0;
|
||||
x % 1 should have been optimized into zero, punt if
|
||||
it makes it here for whatever reason;
|
||||
x % -c should have been optimized into x % c. */
|
||||
|| compare_tree_int (treeop1, 2) <= 0
|
||||
/* Likewise x % c == d where d >= c should be always false. */
|
||||
|| tree_int_cst_le (treeop1, *arg1))
|
||||
return code;
|
||||
|
||||
tree type = TREE_TYPE (*arg0);
|
||||
scalar_int_mode mode;
|
||||
if (!is_a <scalar_int_mode> (TYPE_MODE (type), &mode))
|
||||
return code;
|
||||
if (GET_MODE_BITSIZE (mode) != TYPE_PRECISION (type)
|
||||
|| TYPE_PRECISION (type) <= 1)
|
||||
return code;
|
||||
|
||||
signop sgn = UNSIGNED;
|
||||
/* If both operands are known to have the sign bit clear, handle
|
||||
even the signed modulo case as unsigned. treeop1 is always
|
||||
positive >= 2, checked above. */
|
||||
if (!TYPE_UNSIGNED (type) && get_range_pos_neg (treeop0) != 1)
|
||||
sgn = SIGNED;
|
||||
|
||||
if (!TYPE_UNSIGNED (type))
|
||||
{
|
||||
if (tree_int_cst_sgn (*arg1) == -1)
|
||||
return code;
|
||||
type = unsigned_type_for (type);
|
||||
if (!type || TYPE_MODE (type) != TYPE_MODE (TREE_TYPE (*arg0)))
|
||||
return code;
|
||||
}
|
||||
|
||||
int prec = TYPE_PRECISION (type);
|
||||
wide_int w = wi::to_wide (treeop1);
|
||||
int shift = wi::ctz (w);
|
||||
/* Unsigned (X % C1) == C2 is equivalent to (X - C2) % C1 == 0 if
|
||||
C2 <= -1U % C1, because for any Z >= 0U - C2 in that case (Z % C1) != 0.
|
||||
If C1 is odd, we can handle all cases by subtracting
|
||||
C4 below. We could handle even the even C1 and C2 > -1U % C1 cases
|
||||
e.g. by testing for overflow on the subtraction, punt on that for now
|
||||
though. */
|
||||
if ((sgn == SIGNED || shift) && !integer_zerop (*arg1))
|
||||
{
|
||||
if (sgn == SIGNED)
|
||||
return code;
|
||||
wide_int x = wi::umod_trunc (wi::mask (prec, false, prec), w);
|
||||
if (wi::gtu_p (wi::to_wide (*arg1), x))
|
||||
return code;
|
||||
}
|
||||
|
||||
imm_use_iterator imm_iter;
|
||||
use_operand_p use_p;
|
||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, treeop0)
|
||||
{
|
||||
gimple *use_stmt = USE_STMT (use_p);
|
||||
/* Punt if treeop0 is used in the same bb in a division
|
||||
or another modulo with the same divisor. We should expect
|
||||
the division and modulo combined together. */
|
||||
if (use_stmt == stmt
|
||||
|| gimple_bb (use_stmt) != gimple_bb (stmt))
|
||||
continue;
|
||||
if (!is_gimple_assign (use_stmt)
|
||||
|| (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR
|
||||
&& gimple_assign_rhs_code (use_stmt) != TRUNC_MOD_EXPR))
|
||||
continue;
|
||||
if (gimple_assign_rhs1 (use_stmt) != treeop0
|
||||
|| !operand_equal_p (gimple_assign_rhs2 (use_stmt), treeop1, 0))
|
||||
continue;
|
||||
return code;
|
||||
}
|
||||
|
||||
w = wi::lrshift (w, shift);
|
||||
wide_int a = wide_int::from (w, prec + 1, UNSIGNED);
|
||||
wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
|
||||
wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED);
|
||||
tree c3 = wide_int_to_tree (type, m);
|
||||
tree c5 = NULL_TREE;
|
||||
wide_int d, e;
|
||||
if (sgn == UNSIGNED)
|
||||
{
|
||||
d = wi::divmod_trunc (wi::mask (prec, false, prec), w, UNSIGNED, &e);
|
||||
/* Use <= floor ((1<<prec) - 1) / C1 only if C2 <= ((1<<prec) - 1) % C1,
|
||||
otherwise use < or subtract one from C4. E.g. for
|
||||
x % 3U == 0 we transform this into x * 0xaaaaaaab <= 0x55555555, but
|
||||
x % 3U == 1 already needs to be
|
||||
(x - 1) * 0xaaaaaaabU <= 0x55555554. */
|
||||
if (!shift && wi::gtu_p (wi::to_wide (*arg1), e))
|
||||
d -= 1;
|
||||
if (shift)
|
||||
d = wi::lrshift (d, shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
e = wi::udiv_trunc (wi::mask (prec - 1, false, prec), w);
|
||||
if (!shift)
|
||||
d = wi::lshift (e, 1);
|
||||
else
|
||||
{
|
||||
e = wi::bit_and (e, wi::mask (shift, true, prec));
|
||||
d = wi::lrshift (e, shift - 1);
|
||||
}
|
||||
c5 = wide_int_to_tree (type, e);
|
||||
}
|
||||
tree c4 = wide_int_to_tree (type, d);
|
||||
|
||||
rtx op0 = expand_normal (treeop0);
|
||||
treeop0 = make_tree (TREE_TYPE (treeop0), op0);
|
||||
|
||||
bool speed_p = optimize_insn_for_speed_p ();
|
||||
|
||||
do_pending_stack_adjust ();
|
||||
|
||||
location_t loc = gimple_location (stmt);
|
||||
struct separate_ops ops;
|
||||
ops.code = TRUNC_MOD_EXPR;
|
||||
ops.location = loc;
|
||||
ops.type = TREE_TYPE (treeop0);
|
||||
ops.op0 = treeop0;
|
||||
ops.op1 = treeop1;
|
||||
ops.op2 = NULL_TREE;
|
||||
start_sequence ();
|
||||
rtx mor = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
|
||||
EXPAND_NORMAL);
|
||||
rtx_insn *moinsns = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
unsigned mocost = seq_cost (moinsns, speed_p);
|
||||
mocost += rtx_cost (mor, mode, EQ, 0, speed_p);
|
||||
mocost += rtx_cost (expand_normal (*arg1), mode, EQ, 1, speed_p);
|
||||
|
||||
tree t = fold_convert_loc (loc, type, treeop0);
|
||||
if (!integer_zerop (*arg1))
|
||||
t = fold_build2_loc (loc, MINUS_EXPR, type, t, fold_convert (type, *arg1));
|
||||
t = fold_build2_loc (loc, MULT_EXPR, type, t, c3);
|
||||
if (sgn == SIGNED)
|
||||
t = fold_build2_loc (loc, PLUS_EXPR, type, t, c5);
|
||||
if (shift)
|
||||
{
|
||||
tree s = build_int_cst (NULL_TREE, shift);
|
||||
t = fold_build2_loc (loc, RROTATE_EXPR, type, t, s);
|
||||
}
|
||||
|
||||
start_sequence ();
|
||||
rtx mur = expand_normal (t);
|
||||
rtx_insn *muinsns = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
unsigned mucost = seq_cost (muinsns, speed_p);
|
||||
mucost += rtx_cost (mur, mode, LE, 0, speed_p);
|
||||
mucost += rtx_cost (expand_normal (c4), mode, LE, 1, speed_p);
|
||||
|
||||
if (mocost <= mucost)
|
||||
{
|
||||
emit_insn (moinsns);
|
||||
*arg0 = make_tree (TREE_TYPE (*arg0), mor);
|
||||
return code;
|
||||
}
|
||||
|
||||
emit_insn (muinsns);
|
||||
*arg0 = make_tree (type, mur);
|
||||
*arg1 = c4;
|
||||
return code == EQ_EXPR ? LE_EXPR : GT_EXPR;
|
||||
}
|
||||
|
||||
/* Generate code to calculate OPS, and exploded expression
|
||||
using a store-flag instruction and return an rtx for the result.
|
||||
OPS reflects a comparison.
|
||||
@ -11548,7 +11785,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
|
||||
|
||||
STRIP_NOPS (arg0);
|
||||
STRIP_NOPS (arg1);
|
||||
|
||||
|
||||
/* For vector typed comparisons emit code to generate the desired
|
||||
all-ones or all-zeros mask. Conveniently use the VEC_COND_EXPR
|
||||
expander for this. */
|
||||
@ -11567,6 +11804,24 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
|
||||
}
|
||||
}
|
||||
|
||||
/* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
|
||||
into (x - C2) * C3 < C4. */
|
||||
if ((ops->code == EQ_EXPR || ops->code == NE_EXPR)
|
||||
&& TREE_CODE (arg0) == SSA_NAME
|
||||
&& TREE_CODE (arg1) == INTEGER_CST)
|
||||
{
|
||||
enum tree_code code = maybe_optimize_mod_cmp (ops->code, &arg0, &arg1);
|
||||
if (code != ops->code)
|
||||
{
|
||||
struct separate_ops nops = *ops;
|
||||
nops.code = ops->code = code;
|
||||
nops.op0 = arg0;
|
||||
nops.op1 = arg1;
|
||||
nops.type = TREE_TYPE (arg0);
|
||||
return do_store_flag (&nops, target, mode);
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the rtx comparison code to use. We know that EXP is a comparison
|
||||
operation of some type. Some comparisons against 1 and -1 can be
|
||||
converted to comparisons with zero. Do so here so that the tests
|
||||
|
@ -290,6 +290,8 @@ expand_normal (tree exp)
|
||||
a string constant. */
|
||||
extern tree string_constant (tree, tree *, tree *, tree *);
|
||||
|
||||
extern enum tree_code maybe_optimize_mod_cmp (enum tree_code, tree *, tree *);
|
||||
|
||||
/* Two different ways of generating switch statements. */
|
||||
extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, profile_probability);
|
||||
extern int try_tablejump (tree, tree, tree, tree, rtx, rtx, profile_probability);
|
||||
|
@ -1,3 +1,9 @@
|
||||
2018-09-12 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/82853
|
||||
* gcc.target/i386/pr82853-1.c: New test.
|
||||
* gcc.target/i386/pr82853-2.c: New test.
|
||||
|
||||
2018-09-12 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/87280
|
||||
|
15
gcc/testsuite/gcc.target/i386/pr82853-1.c
Normal file
15
gcc/testsuite/gcc.target/i386/pr82853-1.c
Normal file
@ -0,0 +1,15 @@
|
||||
/* PR middle-end/82853 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mno-bmi2 -mtune=generic" } */
|
||||
/* { dg-final { scan-assembler-times "mul\[lq]\t" 7 } } */
|
||||
/* { dg-final { scan-assembler-not "div\[lq]\t" } } */
|
||||
/* { dg-final { scan-assembler-not "lea\[lq]\t\[^\n\r]*,\[^\n\r]*,2\\)" } } */
|
||||
|
||||
unsigned f1 (unsigned x) { return (x % 679U) == 0; }
|
||||
unsigned f2 (unsigned x) { return (x % 1738U) == 0; }
|
||||
void bar (void);
|
||||
void f3 (unsigned x) { if (x % 3 == 0) bar (); }
|
||||
void f4 (unsigned x) { if (x % 3 == 1) bar (); }
|
||||
void f5 (unsigned x) { if (x % 3 == 2) bar (); }
|
||||
int f6 (int x) { return x % 3 == 0; }
|
||||
int f7 (int x) { return x % 6 == 0; }
|
7
gcc/testsuite/gcc.target/i386/pr82853-2.c
Normal file
7
gcc/testsuite/gcc.target/i386/pr82853-2.c
Normal file
@ -0,0 +1,7 @@
|
||||
/* PR middle-end/82853 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mno-bmi2 -mtune=generic" } */
|
||||
/* { dg-final { scan-assembler-times "mul\[lq]\t" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "div\[lq]\t" } } */
|
||||
|
||||
unsigned f1 (unsigned x, unsigned *y) { *y = x / 679U; return (x % 679U) == 0; }
|
Loading…
x
Reference in New Issue
Block a user