Use nonzero bits to refine range in split_constant_offset (PR 81635)

This patch is part 2 of the fix for PR 81635.  It means that
split_constant_offset can handle loops like:

  for (unsigned int i = 0; i < n; i += 4)
    {
      a[i] = ...;
      a[i + 1] = ...;
    }

CCP records that "i" must have its low 2 bits clear, but we don't
include this information in the range of "i", which remains [0, +INF].
I tried making set_nonzero_bits update the range info in the same
way that set_range_info updates the nonzero bits, but it regressed
cases like vrp117.c and made some other tests worse.

vrp117.c has a multiplication by 10, so CCP can infer that the low bit
of the result is clear.  If we included that in the range, the range
would go from [-INF, +INF] to [-INF, not-quite-+INF].  However,
the multiplication is also known to overflow in all cases, so VRP
saturates the result to [INT_MAX, INT_MAX].  This obviously creates a
contradiction with the nonzero bits, and intersecting the new saturated
range with an existing not-quite-+INF range would make us drop to
VR_UNDEFINED.  We're prepared to fold a comparison with an [INT_MAX,
INT_MAX] value but not with a VR_UNDEFINED value.

The other problems were created when intersecting [-INF, not-quite-+INF]
with a useful VR_ANTI_RANGE like ~[-1, 1].  The intersection would
keep the former range rather than the latter.

The patch therefore keeps the adjustment local to split_constant_offset
for now, but adds a helper routine so that it's easy to move this later.

2018-02-08  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	PR tree-optimization/81635
	* wide-int.h (wi::round_down_for_mask, wi::round_up_for_mask): Declare.
	* wide-int.cc (wi::round_down_for_mask, wi::round_up_for_mask)
	(test_round_for_mask): New functions.
	(wide_int_cc_tests): Call test_round_for_mask.
	* tree-vrp.h (intersect_range_with_nonzero_bits): Declare.
	* tree-vrp.c (intersect_range_with_nonzero_bits): New function.
	* tree-data-ref.c (split_constant_offset_1): Use it to refine the
	range returned by get_range_info.

gcc/testsuite/
	PR tree-optimization/81635
	* gcc.dg/vect/bb-slp-pr81635-3.c: New test.
	* gcc.dg/vect/bb-slp-pr81635-4.c: Likewise.

From-SVN: r257491
This commit is contained in:
Richard Sandiford 2018-02-08 15:16:29 +00:00 committed by Richard Sandiford
parent 39aa9b2369
commit fff2290073
9 changed files with 297 additions and 2 deletions

View File

@ -1,3 +1,15 @@
2018-02-08 Richard Sandiford <richard.sandiford@linaro.org>
PR tree-optimization/81635
* wide-int.h (wi::round_down_for_mask, wi::round_up_for_mask): Declare.
* wide-int.cc (wi::round_down_for_mask, wi::round_up_for_mask)
(test_round_for_mask): New functions.
(wide_int_cc_tests): Call test_round_for_mask.
* tree-vrp.h (intersect_range_with_nonzero_bits): Declare.
* tree-vrp.c (intersect_range_with_nonzero_bits): New function.
* tree-data-ref.c (split_constant_offset_1): Use it to refine the
range returned by get_range_info.
2018-02-08 Jan Hubicka <hubicka@ucw.cz>
PR ipa/81360

View File

@ -1,3 +1,9 @@
2018-02-08 Richard Sandiford <richard.sandiford@linaro.org>
PR tree-optimization/81635
* gcc.dg/vect/bb-slp-pr81635-3.c: New test.
* gcc.dg/vect/bb-slp-pr81635-4.c: Likewise.
2018-02-08 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* gcc.target/s390/nobp-function-pointer-attr.c: New test.

View File

@ -0,0 +1,62 @@
/* { dg-do compile } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-require-effective-target vect_double } */
/* { dg-require-effective-target lp64 } */
void
f1 (double *p, double *q, unsigned int n)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < n; i += 4)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f2 (double *p, double *q, unsigned int n)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < n; i += 2)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f3 (double *p, double *q, unsigned int n)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < n; i += 6)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f4 (double *p, double *q, unsigned int start, unsigned int n)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = start & -2; i < n; i += 2)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
/* { dg-final { scan-tree-dump-times "basic block vectorized" 4 "slp1" } } */

View File

@ -0,0 +1,47 @@
/* { dg-do compile } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-require-effective-target lp64 } */
void
f1 (double *p, double *q, unsigned int n)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < n; i += 1)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f2 (double *p, double *q, unsigned int n)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < n; i += 3)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f3 (double *p, double *q, unsigned int start, unsigned int n)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = start; i < n; i += 2)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */

View File

@ -721,7 +721,13 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
if (TREE_CODE (tmp_var) != SSA_NAME)
return false;
wide_int var_min, var_max;
if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
value_range_type vr_type = get_range_info (tmp_var, &var_min,
&var_max);
wide_int var_nonzero = get_nonzero_bits (tmp_var);
signop sgn = TYPE_SIGN (itype);
if (intersect_range_with_nonzero_bits (vr_type, &var_min,
&var_max, var_nonzero,
sgn) != VR_RANGE)
return false;
/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
@ -729,7 +735,6 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
operations done in ITYPE. The addition must overflow
at both ends of the range or at neither. */
bool overflow[2];
signop sgn = TYPE_SIGN (itype);
unsigned int prec = TYPE_PRECISION (itype);
wide_int woff = wi::to_wide (tmp_off, prec);
wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);

View File

@ -171,6 +171,53 @@ vrp_val_is_min (const_tree val)
&& operand_equal_p (val, type_min, 0)));
}
/* VR_TYPE describes a range with mininum value *MIN and maximum
value *MAX. Restrict the range to the set of values that have
no bits set outside NONZERO_BITS. Update *MIN and *MAX and
return the new range type.
SGN gives the sign of the values described by the range. */
enum value_range_type
intersect_range_with_nonzero_bits (enum value_range_type vr_type,
wide_int *min, wide_int *max,
const wide_int &nonzero_bits,
signop sgn)
{
if (vr_type == VR_RANGE)
{
*max = wi::round_down_for_mask (*max, nonzero_bits);
/* Check that the range contains at least one valid value. */
if (wi::gt_p (*min, *max, sgn))
return VR_UNDEFINED;
*min = wi::round_up_for_mask (*min, nonzero_bits);
gcc_checking_assert (wi::le_p (*min, *max, sgn));
}
if (vr_type == VR_ANTI_RANGE)
{
*max = wi::round_up_for_mask (*max, nonzero_bits);
/* If the calculation wrapped, we now have a VR_RANGE whose
lower bound is *MAX and whose upper bound is *MIN. */
if (wi::gt_p (*min, *max, sgn))
{
std::swap (*min, *max);
*max = wi::round_down_for_mask (*max, nonzero_bits);
gcc_checking_assert (wi::le_p (*min, *max, sgn));
return VR_RANGE;
}
*min = wi::round_down_for_mask (*min, nonzero_bits);
gcc_checking_assert (wi::le_p (*min, *max, sgn));
/* Check whether we now have an empty set of values. */
if (*min - 1 == *max)
return VR_UNDEFINED;
}
return vr_type;
}
/* Set value range VR to VR_UNDEFINED. */

View File

@ -61,6 +61,8 @@ extern void extract_range_from_unary_expr (value_range *vr,
tree op0_type);
extern bool vrp_operand_equal_p (const_tree, const_tree);
extern enum value_range_type intersect_range_with_nonzero_bits
(enum value_range_type, wide_int *, wide_int *, const wide_int &, signop);
struct assert_info
{

View File

@ -2132,6 +2132,70 @@ wi::only_sign_bit_p (const wide_int_ref &x)
return only_sign_bit_p (x, x.precision);
}
/* Return VAL if VAL has no bits set outside MASK. Otherwise round VAL
down to the previous value that has no bits set outside MASK.
This rounding wraps for signed values if VAL is negative and
the top bit of MASK is clear.
For example, round_down_for_mask (6, 0xf1) would give 1 and
round_down_for_mask (24, 0xf1) would give 17. */
wide_int
wi::round_down_for_mask (const wide_int &val, const wide_int &mask)
{
/* Get the bits in VAL that are outside the mask. */
wide_int extra_bits = wi::bit_and_not (val, mask);
if (extra_bits == 0)
return val;
/* Get a mask that includes the top bit in EXTRA_BITS and is all 1s
below that bit. */
unsigned int precision = val.get_precision ();
wide_int lower_mask = wi::mask (precision - wi::clz (extra_bits),
false, precision);
/* Clear the bits that aren't in MASK, but ensure that all bits
in MASK below the top cleared bit are set. */
return (val & mask) | (mask & lower_mask);
}
/* Return VAL if VAL has no bits set outside MASK. Otherwise round VAL
up to the next value that has no bits set outside MASK. The rounding
wraps if there are no suitable values greater than VAL.
For example, round_up_for_mask (6, 0xf1) would give 16 and
round_up_for_mask (24, 0xf1) would give 32. */
wide_int
wi::round_up_for_mask (const wide_int &val, const wide_int &mask)
{
/* Get the bits in VAL that are outside the mask. */
wide_int extra_bits = wi::bit_and_not (val, mask);
if (extra_bits == 0)
return val;
/* Get a mask that is all 1s above the top bit in EXTRA_BITS. */
unsigned int precision = val.get_precision ();
wide_int upper_mask = wi::mask (precision - wi::clz (extra_bits),
true, precision);
/* Get the bits of the mask that are above the top bit in EXTRA_BITS. */
upper_mask &= mask;
/* Conceptually we need to:
- clear bits of VAL outside UPPER_MASK
- add the lowest bit in UPPER_MASK to VAL (or add 0 if UPPER_MASK is 0)
- propagate the carry through the bits of VAL in UPPER_MASK
If (~VAL & UPPER_MASK) is nonzero, the carry eventually
reaches that bit and the process leaves all lower bits clear.
If (~VAL & UPPER_MASK) is zero then the result is also zero. */
wide_int tmp = wi::bit_and_not (upper_mask, val);
return (val | tmp) & -tmp;
}
/*
* Private utilities.
*/
@ -2384,6 +2448,53 @@ test_overflow ()
}
}
/* Test the round_{down,up}_for_mask functions. */
static void
test_round_for_mask ()
{
unsigned int prec = 18;
ASSERT_EQ (17, wi::round_down_for_mask (wi::shwi (17, prec),
wi::shwi (0xf1, prec)));
ASSERT_EQ (17, wi::round_up_for_mask (wi::shwi (17, prec),
wi::shwi (0xf1, prec)));
ASSERT_EQ (1, wi::round_down_for_mask (wi::shwi (6, prec),
wi::shwi (0xf1, prec)));
ASSERT_EQ (16, wi::round_up_for_mask (wi::shwi (6, prec),
wi::shwi (0xf1, prec)));
ASSERT_EQ (17, wi::round_down_for_mask (wi::shwi (24, prec),
wi::shwi (0xf1, prec)));
ASSERT_EQ (32, wi::round_up_for_mask (wi::shwi (24, prec),
wi::shwi (0xf1, prec)));
ASSERT_EQ (0x011, wi::round_down_for_mask (wi::shwi (0x22, prec),
wi::shwi (0x111, prec)));
ASSERT_EQ (0x100, wi::round_up_for_mask (wi::shwi (0x22, prec),
wi::shwi (0x111, prec)));
ASSERT_EQ (100, wi::round_down_for_mask (wi::shwi (101, prec),
wi::shwi (0xfc, prec)));
ASSERT_EQ (104, wi::round_up_for_mask (wi::shwi (101, prec),
wi::shwi (0xfc, prec)));
ASSERT_EQ (0x2bc, wi::round_down_for_mask (wi::shwi (0x2c2, prec),
wi::shwi (0xabc, prec)));
ASSERT_EQ (0x800, wi::round_up_for_mask (wi::shwi (0x2c2, prec),
wi::shwi (0xabc, prec)));
ASSERT_EQ (0xabc, wi::round_down_for_mask (wi::shwi (0xabd, prec),
wi::shwi (0xabc, prec)));
ASSERT_EQ (0, wi::round_up_for_mask (wi::shwi (0xabd, prec),
wi::shwi (0xabc, prec)));
ASSERT_EQ (0xabc, wi::round_down_for_mask (wi::shwi (0x1000, prec),
wi::shwi (0xabc, prec)));
ASSERT_EQ (0, wi::round_up_for_mask (wi::shwi (0x1000, prec),
wi::shwi (0xabc, prec)));
}
/* Run all of the selftests within this file, for all value types. */
void
@ -2393,6 +2504,7 @@ wide_int_cc_tests ()
run_all_wide_int_tests <offset_int> ();
run_all_wide_int_tests <widest_int> ();
test_overflow ();
test_round_for_mask ();
}
} // namespace selftest

View File

@ -3308,6 +3308,8 @@ namespace wi
wide_int set_bit_in_zero (unsigned int, unsigned int);
wide_int insert (const wide_int &x, const wide_int &y, unsigned int,
unsigned int);
wide_int round_down_for_mask (const wide_int &, const wide_int &);
wide_int round_up_for_mask (const wide_int &, const wide_int &);
template <typename T>
T mask (unsigned int, bool);