loop.c (strength_reduce): If scan_start points to the loop exit test...

* loop.c (strength_reduce): If scan_start points to the loop exit test, be wary of subversive use of gotos inside expression statements. Don't set maybe_multiple for a backward jump that does not include the label under consideration into its range. * unroll.c (biv_total_increment): Make use of maybe_multiple field. From-SVN: r24196
2025-01-31 14:30:10 +08:00 · 1998-12-08 14:50:03 +00:00 · 1998-12-08 14:50:03 +00:00 · 5353610bac
commit 5353610bac
parent 03d937fceb
4 changed files with 48 additions and 8 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,11 @@
+Tue Dec  8 22:47:15 1998  J"orn Rennecke <amylaar@cygnus.co.uk>
+
+	* loop.c (strength_reduce): If scan_start points to the loop exit
+	test, be wary of subversive use of gotos inside expression statements.
+	Don't set maybe_multiple for a backward jump that does not
+	include the label under consideration into its range.
+	* unroll.c (biv_total_increment): Make use of maybe_multiple field.
+
 Tue Dec  8 22:33:18 1998  J"orn Rennecke <amylaar@cygnus.co.uk>

 	* explow.c (plus_constant_wide): Don't immediately return with
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@ -2852,6 +2852,27 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
   This could optimize to a bfexts instruction.
   But C doesn't use these operations, so their optimizations are
   left for later.  */
+/* ??? For modulo, we don't actually need the highpart of the first product,
+   the low part will do nicely.  And for small divisors, the second multiply
+   can also be a low-part only multiply or even be completely left out.
+   E.g. to calculate the remainder of a division by 3 with a 32 bit
+   multiply, multiply with 0x55555556 and extract the upper two bits;
+   the result is exact for inputs up to 0x1fffffff.
+   The input range can be reduced by using cross-sum rules.
+   For odd divisors >= 3, the following table gives right shift counts
+   so that if an number is shifted by an integer multiple of the given
+   amount, the remainder stays the same:
+   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
+   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
+   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
+   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
+   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
+
+   Cross-sum rules for even numbers can be derived by leaving as many bits
+   to the right alone as the divisor has zeros to the right.
+   E.g. if x is an unsigned 32 bit number:
+   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
+   */

 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)

--- a/gcc/loop.c
+++ b/gcc/loop.c
@ -3555,6 +3555,11 @@ strength_reduce (scan_start, end, loop_top, insn_count,
  struct loop_info loop_iteration_info;
  struct loop_info *loop_info = &loop_iteration_info;

+  /* If scan_start points to the loop exit test, we have to be wary of
+     subversive use of gotos inside expression statements.  */
+  if (prev_nonnote_insn (scan_start) != prev_nonnote_insn (loop_start))
+    maybe_multiple = back_branch_in_range_p (scan_start, loop_start, loop_end);
+
  reg_iv_type = (enum iv_mode *) alloca (max_reg_before_loop
 					 * sizeof (enum iv_mode));
  bzero ((char *) reg_iv_type, max_reg_before_loop * sizeof (enum iv_mode));
@ -3618,8 +3623,8 @@ strength_reduce (scan_start, end, loop_top, insn_count,
      /* Past CODE_LABEL, we get to insns that may be executed multiple
 	 times.  The only way we can be sure that they can't is if every
 	 jump insn between here and the end of the loop either
-	 returns, exits the loop, is a forward jump, or is a jump
-	 to the loop start.  */
+	 returns, exits the loop, is a jump to a location that is still
+	 behind the label, or is a jump to the loop start.  */

      if (GET_CODE (p) == CODE_LABEL)
 	{
@ -3648,9 +3653,12 @@ strength_reduce (scan_start, end, loop_top, insn_count,
 		      || (JUMP_LABEL (insn) != 0
 			  && JUMP_LABEL (insn) != scan_start
 			  && (INSN_UID (JUMP_LABEL (insn)) >= max_uid_for_loop
-			      || INSN_UID (insn) >= max_uid_for_loop
-			      || (INSN_LUID (JUMP_LABEL (insn))
-				  < INSN_LUID (insn))))))
+			      || (INSN_UID (p) < max_uid_for_loop
+				  ? (INSN_LUID (JUMP_LABEL (insn))
+				     <= INSN_LUID (p))
+				  : (INSN_UID (insn) >= max_uid_for_loop
+				     || (INSN_LUID (JUMP_LABEL (insn))
+					 < INSN_LUID (insn))))))))
 		{
 		  maybe_multiple = 1;
 		  break;
--- a/gcc/unroll.c
+++ b/gcc/unroll.c
@ -1196,7 +1196,7 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
 	PATTERN (insn) = remap_split_bivs (PATTERN (insn));
    }

-  /* For unroll_number - 1 times, make a copy of each instruction
+  /* For unroll_number times, make a copy of each instruction
     between copy_start and copy_end, and insert these new instructions
     before the end of the loop.  */

@ -1295,7 +1295,10 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
 /* ??? If the loop is known to be executed very many times, or the machine
   has a very cheap divide instruction, then preconditioning is a win even
   when the increment is not a power of 2.  Use RTX_COST to compute
-   whether divide is cheap.  */
+   whether divide is cheap.
+   ??? A divide by constant doesn't actually need a divide, look at
+   expand_divmod.  The reduced cost of this optimized modulo is not
+   reflected in RTX_COST.  */

 int
 precondition_loop_p (loop_start, loop_info,
@ -2313,7 +2316,7 @@ biv_total_increment (bl, loop_start, loop_end)
  for (v = bl->biv; v; v = v->next_iv)
    {
      if (v->always_computable && v->mult_val == const1_rtx
-	  && ! back_branch_in_range_p (v->insn, loop_start, loop_end))
+	  && ! v->maybe_multiple)
 	result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
      else
 	return 0;