mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-31 14:30:10 +08:00
loop.c (strength_reduce): If scan_start points to the loop exit test...
* loop.c (strength_reduce): If scan_start points to the loop exit test, be wary of subversive use of gotos inside expression statements. Don't set maybe_multiple for a backward jump that does not include the label under consideration into its range. * unroll.c (biv_total_increment): Make use of maybe_multiple field. From-SVN: r24196
This commit is contained in:
parent
03d937fceb
commit
5353610bac
@ -1,3 +1,11 @@
|
||||
Tue Dec 8 22:47:15 1998 J"orn Rennecke <amylaar@cygnus.co.uk>
|
||||
|
||||
* loop.c (strength_reduce): If scan_start points to the loop exit
|
||||
test, be wary of subversive use of gotos inside expression statements.
|
||||
Don't set maybe_multiple for a backward jump that does not
|
||||
include the label under consideration into its range.
|
||||
* unroll.c (biv_total_increment): Make use of maybe_multiple field.
|
||||
|
||||
Tue Dec 8 22:33:18 1998 J"orn Rennecke <amylaar@cygnus.co.uk>
|
||||
|
||||
* explow.c (plus_constant_wide): Don't immediately return with
|
||||
|
21
gcc/expmed.c
21
gcc/expmed.c
@ -2852,6 +2852,27 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
|
||||
This could optimize to a bfexts instruction.
|
||||
But C doesn't use these operations, so their optimizations are
|
||||
left for later. */
|
||||
/* ??? For modulo, we don't actually need the highpart of the first product,
|
||||
the low part will do nicely. And for small divisors, the second multiply
|
||||
can also be a low-part only multiply or even be completely left out.
|
||||
E.g. to calculate the remainder of a division by 3 with a 32 bit
|
||||
multiply, multiply with 0x55555556 and extract the upper two bits;
|
||||
the result is exact for inputs up to 0x1fffffff.
|
||||
The input range can be reduced by using cross-sum rules.
|
||||
For odd divisors >= 3, the following table gives right shift counts
|
||||
so that if an number is shifted by an integer multiple of the given
|
||||
amount, the remainder stays the same:
|
||||
2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
|
||||
14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
|
||||
0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
|
||||
20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
|
||||
0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
|
||||
|
||||
Cross-sum rules for even numbers can be derived by leaving as many bits
|
||||
to the right alone as the divisor has zeros to the right.
|
||||
E.g. if x is an unsigned 32 bit number:
|
||||
(x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
|
||||
*/
|
||||
|
||||
#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
|
||||
|
||||
|
18
gcc/loop.c
18
gcc/loop.c
@ -3555,6 +3555,11 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
struct loop_info loop_iteration_info;
|
||||
struct loop_info *loop_info = &loop_iteration_info;
|
||||
|
||||
/* If scan_start points to the loop exit test, we have to be wary of
|
||||
subversive use of gotos inside expression statements. */
|
||||
if (prev_nonnote_insn (scan_start) != prev_nonnote_insn (loop_start))
|
||||
maybe_multiple = back_branch_in_range_p (scan_start, loop_start, loop_end);
|
||||
|
||||
reg_iv_type = (enum iv_mode *) alloca (max_reg_before_loop
|
||||
* sizeof (enum iv_mode));
|
||||
bzero ((char *) reg_iv_type, max_reg_before_loop * sizeof (enum iv_mode));
|
||||
@ -3618,8 +3623,8 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
/* Past CODE_LABEL, we get to insns that may be executed multiple
|
||||
times. The only way we can be sure that they can't is if every
|
||||
jump insn between here and the end of the loop either
|
||||
returns, exits the loop, is a forward jump, or is a jump
|
||||
to the loop start. */
|
||||
returns, exits the loop, is a jump to a location that is still
|
||||
behind the label, or is a jump to the loop start. */
|
||||
|
||||
if (GET_CODE (p) == CODE_LABEL)
|
||||
{
|
||||
@ -3648,9 +3653,12 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
|| (JUMP_LABEL (insn) != 0
|
||||
&& JUMP_LABEL (insn) != scan_start
|
||||
&& (INSN_UID (JUMP_LABEL (insn)) >= max_uid_for_loop
|
||||
|| INSN_UID (insn) >= max_uid_for_loop
|
||||
|| (INSN_LUID (JUMP_LABEL (insn))
|
||||
< INSN_LUID (insn))))))
|
||||
|| (INSN_UID (p) < max_uid_for_loop
|
||||
? (INSN_LUID (JUMP_LABEL (insn))
|
||||
<= INSN_LUID (p))
|
||||
: (INSN_UID (insn) >= max_uid_for_loop
|
||||
|| (INSN_LUID (JUMP_LABEL (insn))
|
||||
< INSN_LUID (insn))))))))
|
||||
{
|
||||
maybe_multiple = 1;
|
||||
break;
|
||||
|
@ -1196,7 +1196,7 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
|
||||
PATTERN (insn) = remap_split_bivs (PATTERN (insn));
|
||||
}
|
||||
|
||||
/* For unroll_number - 1 times, make a copy of each instruction
|
||||
/* For unroll_number times, make a copy of each instruction
|
||||
between copy_start and copy_end, and insert these new instructions
|
||||
before the end of the loop. */
|
||||
|
||||
@ -1295,7 +1295,10 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
|
||||
/* ??? If the loop is known to be executed very many times, or the machine
|
||||
has a very cheap divide instruction, then preconditioning is a win even
|
||||
when the increment is not a power of 2. Use RTX_COST to compute
|
||||
whether divide is cheap. */
|
||||
whether divide is cheap.
|
||||
??? A divide by constant doesn't actually need a divide, look at
|
||||
expand_divmod. The reduced cost of this optimized modulo is not
|
||||
reflected in RTX_COST. */
|
||||
|
||||
int
|
||||
precondition_loop_p (loop_start, loop_info,
|
||||
@ -2313,7 +2316,7 @@ biv_total_increment (bl, loop_start, loop_end)
|
||||
for (v = bl->biv; v; v = v->next_iv)
|
||||
{
|
||||
if (v->always_computable && v->mult_val == const1_rtx
|
||||
&& ! back_branch_in_range_p (v->insn, loop_start, loop_end))
|
||||
&& ! v->maybe_multiple)
|
||||
result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
|
||||
else
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user