mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-19 06:10:26 +08:00
Use nonzero bits to refine range in split_constant_offset (PR 81635)
This patch is part 2 of the fix for PR 81635. It means that split_constant_offset can handle loops like: for (unsigned int i = 0; i < n; i += 4) { a[i] = ...; a[i + 1] = ...; } CCP records that "i" must have its low 2 bits clear, but we don't include this information in the range of "i", which remains [0, +INF]. I tried making set_nonzero_bits update the range info in the same way that set_range_info updates the nonzero bits, but it regressed cases like vrp117.c and made some other tests worse. vrp117.c has a multiplication by 10, so CCP can infer that the low bit of the result is clear. If we included that in the range, the range would go from [-INF, +INF] to [-INF, not-quite-+INF]. However, the multiplication is also known to overflow in all cases, so VRP saturates the result to [INT_MAX, INT_MAX]. This obviously creates a contradiction with the nonzero bits, and intersecting the new saturated range with an existing not-quite-+INF range would make us drop to VR_UNDEFINED. We're prepared to fold a comparison with an [INT_MAX, INT_MAX] value but not with a VR_UNDEFINED value. The other problems were created when intersecting [-INF, not-quite-+INF] with a useful VR_ANTI_RANGE like ~[-1, 1]. The intersection would keep the former range rather than the latter. The patch therefore keeps the adjustment local to split_constant_offset for now, but adds a helper routine so that it's easy to move this later. 2018-02-08 Richard Sandiford <richard.sandiford@linaro.org> gcc/ PR tree-optimization/81635 * wide-int.h (wi::round_down_for_mask, wi::round_up_for_mask): Declare. * wide-int.cc (wi::round_down_for_mask, wi::round_up_for_mask) (test_round_for_mask): New functions. (wide_int_cc_tests): Call test_round_for_mask. * tree-vrp.h (intersect_range_with_nonzero_bits): Declare. * tree-vrp.c (intersect_range_with_nonzero_bits): New function. * tree-data-ref.c (split_constant_offset_1): Use it to refine the range returned by get_range_info. gcc/testsuite/ PR tree-optimization/81635 * gcc.dg/vect/bb-slp-pr81635-3.c: New test. * gcc.dg/vect/bb-slp-pr81635-4.c: Likewise. From-SVN: r257491
This commit is contained in:
parent
39aa9b2369
commit
fff2290073
@ -1,3 +1,15 @@
|
||||
2018-02-08 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR tree-optimization/81635
|
||||
* wide-int.h (wi::round_down_for_mask, wi::round_up_for_mask): Declare.
|
||||
* wide-int.cc (wi::round_down_for_mask, wi::round_up_for_mask)
|
||||
(test_round_for_mask): New functions.
|
||||
(wide_int_cc_tests): Call test_round_for_mask.
|
||||
* tree-vrp.h (intersect_range_with_nonzero_bits): Declare.
|
||||
* tree-vrp.c (intersect_range_with_nonzero_bits): New function.
|
||||
* tree-data-ref.c (split_constant_offset_1): Use it to refine the
|
||||
range returned by get_range_info.
|
||||
|
||||
2018-02-08 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
PR ipa/81360
|
||||
|
@ -1,3 +1,9 @@
|
||||
2018-02-08 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR tree-optimization/81635
|
||||
* gcc.dg/vect/bb-slp-pr81635-3.c: New test.
|
||||
* gcc.dg/vect/bb-slp-pr81635-4.c: Likewise.
|
||||
|
||||
2018-02-08 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/s390/nobp-function-pointer-attr.c: New test.
|
||||
|
62
gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-3.c
Normal file
62
gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-3.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
|
||||
/* { dg-require-effective-target vect_double } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
|
||||
void
|
||||
f1 (double *p, double *q, unsigned int n)
|
||||
{
|
||||
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
|
||||
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
|
||||
for (unsigned int i = 0; i < n; i += 4)
|
||||
{
|
||||
double a = q[i] + p[i];
|
||||
double b = q[i + 1] + p[i + 1];
|
||||
q[i] = a;
|
||||
q[i + 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
f2 (double *p, double *q, unsigned int n)
|
||||
{
|
||||
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
|
||||
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
|
||||
for (unsigned int i = 0; i < n; i += 2)
|
||||
{
|
||||
double a = q[i] + p[i];
|
||||
double b = q[i + 1] + p[i + 1];
|
||||
q[i] = a;
|
||||
q[i + 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
f3 (double *p, double *q, unsigned int n)
|
||||
{
|
||||
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
|
||||
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
|
||||
for (unsigned int i = 0; i < n; i += 6)
|
||||
{
|
||||
double a = q[i] + p[i];
|
||||
double b = q[i + 1] + p[i + 1];
|
||||
q[i] = a;
|
||||
q[i + 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
f4 (double *p, double *q, unsigned int start, unsigned int n)
|
||||
{
|
||||
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
|
||||
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
|
||||
for (unsigned int i = start & -2; i < n; i += 2)
|
||||
{
|
||||
double a = q[i] + p[i];
|
||||
double b = q[i + 1] + p[i + 1];
|
||||
q[i] = a;
|
||||
q[i + 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 4 "slp1" } } */
|
47
gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-4.c
Normal file
47
gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-4.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
|
||||
void
|
||||
f1 (double *p, double *q, unsigned int n)
|
||||
{
|
||||
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
|
||||
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
|
||||
for (unsigned int i = 0; i < n; i += 1)
|
||||
{
|
||||
double a = q[i] + p[i];
|
||||
double b = q[i + 1] + p[i + 1];
|
||||
q[i] = a;
|
||||
q[i + 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
f2 (double *p, double *q, unsigned int n)
|
||||
{
|
||||
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
|
||||
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
|
||||
for (unsigned int i = 0; i < n; i += 3)
|
||||
{
|
||||
double a = q[i] + p[i];
|
||||
double b = q[i + 1] + p[i + 1];
|
||||
q[i] = a;
|
||||
q[i + 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
f3 (double *p, double *q, unsigned int start, unsigned int n)
|
||||
{
|
||||
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
|
||||
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
|
||||
for (unsigned int i = start; i < n; i += 2)
|
||||
{
|
||||
double a = q[i] + p[i];
|
||||
double b = q[i + 1] + p[i + 1];
|
||||
q[i] = a;
|
||||
q[i + 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */
|
@ -721,7 +721,13 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
|
||||
if (TREE_CODE (tmp_var) != SSA_NAME)
|
||||
return false;
|
||||
wide_int var_min, var_max;
|
||||
if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
|
||||
value_range_type vr_type = get_range_info (tmp_var, &var_min,
|
||||
&var_max);
|
||||
wide_int var_nonzero = get_nonzero_bits (tmp_var);
|
||||
signop sgn = TYPE_SIGN (itype);
|
||||
if (intersect_range_with_nonzero_bits (vr_type, &var_min,
|
||||
&var_max, var_nonzero,
|
||||
sgn) != VR_RANGE)
|
||||
return false;
|
||||
|
||||
/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
|
||||
@ -729,7 +735,6 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
|
||||
operations done in ITYPE. The addition must overflow
|
||||
at both ends of the range or at neither. */
|
||||
bool overflow[2];
|
||||
signop sgn = TYPE_SIGN (itype);
|
||||
unsigned int prec = TYPE_PRECISION (itype);
|
||||
wide_int woff = wi::to_wide (tmp_off, prec);
|
||||
wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
|
||||
|
@ -171,6 +171,53 @@ vrp_val_is_min (const_tree val)
|
||||
&& operand_equal_p (val, type_min, 0)));
|
||||
}
|
||||
|
||||
/* VR_TYPE describes a range with mininum value *MIN and maximum
|
||||
value *MAX. Restrict the range to the set of values that have
|
||||
no bits set outside NONZERO_BITS. Update *MIN and *MAX and
|
||||
return the new range type.
|
||||
|
||||
SGN gives the sign of the values described by the range. */
|
||||
|
||||
enum value_range_type
|
||||
intersect_range_with_nonzero_bits (enum value_range_type vr_type,
|
||||
wide_int *min, wide_int *max,
|
||||
const wide_int &nonzero_bits,
|
||||
signop sgn)
|
||||
{
|
||||
if (vr_type == VR_RANGE)
|
||||
{
|
||||
*max = wi::round_down_for_mask (*max, nonzero_bits);
|
||||
|
||||
/* Check that the range contains at least one valid value. */
|
||||
if (wi::gt_p (*min, *max, sgn))
|
||||
return VR_UNDEFINED;
|
||||
|
||||
*min = wi::round_up_for_mask (*min, nonzero_bits);
|
||||
gcc_checking_assert (wi::le_p (*min, *max, sgn));
|
||||
}
|
||||
if (vr_type == VR_ANTI_RANGE)
|
||||
{
|
||||
*max = wi::round_up_for_mask (*max, nonzero_bits);
|
||||
|
||||
/* If the calculation wrapped, we now have a VR_RANGE whose
|
||||
lower bound is *MAX and whose upper bound is *MIN. */
|
||||
if (wi::gt_p (*min, *max, sgn))
|
||||
{
|
||||
std::swap (*min, *max);
|
||||
*max = wi::round_down_for_mask (*max, nonzero_bits);
|
||||
gcc_checking_assert (wi::le_p (*min, *max, sgn));
|
||||
return VR_RANGE;
|
||||
}
|
||||
|
||||
*min = wi::round_down_for_mask (*min, nonzero_bits);
|
||||
gcc_checking_assert (wi::le_p (*min, *max, sgn));
|
||||
|
||||
/* Check whether we now have an empty set of values. */
|
||||
if (*min - 1 == *max)
|
||||
return VR_UNDEFINED;
|
||||
}
|
||||
return vr_type;
|
||||
}
|
||||
|
||||
/* Set value range VR to VR_UNDEFINED. */
|
||||
|
||||
|
@ -61,6 +61,8 @@ extern void extract_range_from_unary_expr (value_range *vr,
|
||||
tree op0_type);
|
||||
|
||||
extern bool vrp_operand_equal_p (const_tree, const_tree);
|
||||
extern enum value_range_type intersect_range_with_nonzero_bits
|
||||
(enum value_range_type, wide_int *, wide_int *, const wide_int &, signop);
|
||||
|
||||
struct assert_info
|
||||
{
|
||||
|
112
gcc/wide-int.cc
112
gcc/wide-int.cc
@ -2132,6 +2132,70 @@ wi::only_sign_bit_p (const wide_int_ref &x)
|
||||
return only_sign_bit_p (x, x.precision);
|
||||
}
|
||||
|
||||
/* Return VAL if VAL has no bits set outside MASK. Otherwise round VAL
|
||||
down to the previous value that has no bits set outside MASK.
|
||||
This rounding wraps for signed values if VAL is negative and
|
||||
the top bit of MASK is clear.
|
||||
|
||||
For example, round_down_for_mask (6, 0xf1) would give 1 and
|
||||
round_down_for_mask (24, 0xf1) would give 17. */
|
||||
|
||||
wide_int
|
||||
wi::round_down_for_mask (const wide_int &val, const wide_int &mask)
|
||||
{
|
||||
/* Get the bits in VAL that are outside the mask. */
|
||||
wide_int extra_bits = wi::bit_and_not (val, mask);
|
||||
if (extra_bits == 0)
|
||||
return val;
|
||||
|
||||
/* Get a mask that includes the top bit in EXTRA_BITS and is all 1s
|
||||
below that bit. */
|
||||
unsigned int precision = val.get_precision ();
|
||||
wide_int lower_mask = wi::mask (precision - wi::clz (extra_bits),
|
||||
false, precision);
|
||||
|
||||
/* Clear the bits that aren't in MASK, but ensure that all bits
|
||||
in MASK below the top cleared bit are set. */
|
||||
return (val & mask) | (mask & lower_mask);
|
||||
}
|
||||
|
||||
/* Return VAL if VAL has no bits set outside MASK. Otherwise round VAL
|
||||
up to the next value that has no bits set outside MASK. The rounding
|
||||
wraps if there are no suitable values greater than VAL.
|
||||
|
||||
For example, round_up_for_mask (6, 0xf1) would give 16 and
|
||||
round_up_for_mask (24, 0xf1) would give 32. */
|
||||
|
||||
wide_int
|
||||
wi::round_up_for_mask (const wide_int &val, const wide_int &mask)
|
||||
{
|
||||
/* Get the bits in VAL that are outside the mask. */
|
||||
wide_int extra_bits = wi::bit_and_not (val, mask);
|
||||
if (extra_bits == 0)
|
||||
return val;
|
||||
|
||||
/* Get a mask that is all 1s above the top bit in EXTRA_BITS. */
|
||||
unsigned int precision = val.get_precision ();
|
||||
wide_int upper_mask = wi::mask (precision - wi::clz (extra_bits),
|
||||
true, precision);
|
||||
|
||||
/* Get the bits of the mask that are above the top bit in EXTRA_BITS. */
|
||||
upper_mask &= mask;
|
||||
|
||||
/* Conceptually we need to:
|
||||
|
||||
- clear bits of VAL outside UPPER_MASK
|
||||
- add the lowest bit in UPPER_MASK to VAL (or add 0 if UPPER_MASK is 0)
|
||||
- propagate the carry through the bits of VAL in UPPER_MASK
|
||||
|
||||
If (~VAL & UPPER_MASK) is nonzero, the carry eventually
|
||||
reaches that bit and the process leaves all lower bits clear.
|
||||
If (~VAL & UPPER_MASK) is zero then the result is also zero. */
|
||||
wide_int tmp = wi::bit_and_not (upper_mask, val);
|
||||
|
||||
return (val | tmp) & -tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Private utilities.
|
||||
*/
|
||||
@ -2384,6 +2448,53 @@ test_overflow ()
|
||||
}
|
||||
}
|
||||
|
||||
/* Test the round_{down,up}_for_mask functions. */
|
||||
|
||||
static void
|
||||
test_round_for_mask ()
|
||||
{
|
||||
unsigned int prec = 18;
|
||||
ASSERT_EQ (17, wi::round_down_for_mask (wi::shwi (17, prec),
|
||||
wi::shwi (0xf1, prec)));
|
||||
ASSERT_EQ (17, wi::round_up_for_mask (wi::shwi (17, prec),
|
||||
wi::shwi (0xf1, prec)));
|
||||
|
||||
ASSERT_EQ (1, wi::round_down_for_mask (wi::shwi (6, prec),
|
||||
wi::shwi (0xf1, prec)));
|
||||
ASSERT_EQ (16, wi::round_up_for_mask (wi::shwi (6, prec),
|
||||
wi::shwi (0xf1, prec)));
|
||||
|
||||
ASSERT_EQ (17, wi::round_down_for_mask (wi::shwi (24, prec),
|
||||
wi::shwi (0xf1, prec)));
|
||||
ASSERT_EQ (32, wi::round_up_for_mask (wi::shwi (24, prec),
|
||||
wi::shwi (0xf1, prec)));
|
||||
|
||||
ASSERT_EQ (0x011, wi::round_down_for_mask (wi::shwi (0x22, prec),
|
||||
wi::shwi (0x111, prec)));
|
||||
ASSERT_EQ (0x100, wi::round_up_for_mask (wi::shwi (0x22, prec),
|
||||
wi::shwi (0x111, prec)));
|
||||
|
||||
ASSERT_EQ (100, wi::round_down_for_mask (wi::shwi (101, prec),
|
||||
wi::shwi (0xfc, prec)));
|
||||
ASSERT_EQ (104, wi::round_up_for_mask (wi::shwi (101, prec),
|
||||
wi::shwi (0xfc, prec)));
|
||||
|
||||
ASSERT_EQ (0x2bc, wi::round_down_for_mask (wi::shwi (0x2c2, prec),
|
||||
wi::shwi (0xabc, prec)));
|
||||
ASSERT_EQ (0x800, wi::round_up_for_mask (wi::shwi (0x2c2, prec),
|
||||
wi::shwi (0xabc, prec)));
|
||||
|
||||
ASSERT_EQ (0xabc, wi::round_down_for_mask (wi::shwi (0xabd, prec),
|
||||
wi::shwi (0xabc, prec)));
|
||||
ASSERT_EQ (0, wi::round_up_for_mask (wi::shwi (0xabd, prec),
|
||||
wi::shwi (0xabc, prec)));
|
||||
|
||||
ASSERT_EQ (0xabc, wi::round_down_for_mask (wi::shwi (0x1000, prec),
|
||||
wi::shwi (0xabc, prec)));
|
||||
ASSERT_EQ (0, wi::round_up_for_mask (wi::shwi (0x1000, prec),
|
||||
wi::shwi (0xabc, prec)));
|
||||
}
|
||||
|
||||
/* Run all of the selftests within this file, for all value types. */
|
||||
|
||||
void
|
||||
@ -2393,6 +2504,7 @@ wide_int_cc_tests ()
|
||||
run_all_wide_int_tests <offset_int> ();
|
||||
run_all_wide_int_tests <widest_int> ();
|
||||
test_overflow ();
|
||||
test_round_for_mask ();
|
||||
}
|
||||
|
||||
} // namespace selftest
|
||||
|
@ -3308,6 +3308,8 @@ namespace wi
|
||||
wide_int set_bit_in_zero (unsigned int, unsigned int);
|
||||
wide_int insert (const wide_int &x, const wide_int &y, unsigned int,
|
||||
unsigned int);
|
||||
wide_int round_down_for_mask (const wide_int &, const wide_int &);
|
||||
wide_int round_up_for_mask (const wide_int &, const wide_int &);
|
||||
|
||||
template <typename T>
|
||||
T mask (unsigned int, bool);
|
||||
|
Loading…
x
Reference in New Issue
Block a user