c4x.c (c4x_parallel_process): Disable until BCT loop optimization stable for the C4x.

* config/c4x/c4x.c (c4x_parallel_process): Disable until BCT
	loop optimization stable for the C4x.
	(c4x_rptb_info_t, c4x_dump, c4x_rptb_in_range, c4x_rptb_unjumped_loop,
	c4x_rptb_find_comp_and_jump, c4x_rptb_loop_info_get,
	c4x_rptb_emit_init, c4x_rptb_process): Deleted (superceded by BCT
	loop optimization).
	(c4x_address_conflict): Be more paranoid when packing a volatile
	memref in a parallel load/store.

From-SVN: r23196
This commit is contained in:
Michael Hayes 1998-10-20 03:18:57 +00:00 committed by Michael Hayes
parent 95cb020334
commit f1c374cbf1
2 changed files with 21 additions and 554 deletions

View File

@ -1,3 +1,14 @@
Tue Oct 20 22:16:11 1998 Michael Hayes <m.hayes@elec.canterbury.ac.nz>
* config/c4x/c4x.c (c4x_parallel_process): Disable until BCT
loop optimization stable for the C4x.
(c4x_rptb_info_t, c4x_dump, c4x_rptb_in_range, c4x_rptb_unjumped_loop,
c4x_rptb_find_comp_and_jump, c4x_rptb_loop_info_get,
c4x_rptb_emit_init, c4x_rptb_process): Deleted (superceded by BCT
loop optimization).
(c4x_address_conflict): Be more paranoid when packing a volatile
memref in a parallel load/store.
Tue Oct 20 21:56:05 1998 Michael Hayes <m.hayes@elec.canterbury.ac.nz>
* config/c4x/c4x.md (repeat_block_top, repeat_block_end,

View File

@ -132,22 +132,6 @@ enum machine_mode c4x_caller_save_map[FIRST_PSEUDO_REGISTER] =
};
/* rptb_info has enough information to compute rtx for loop counter. */
typedef struct
{
int loop_count; /* Positive if loop count is constant */
/* The rest of fields are meaningless if loop_count is set */
rtx start_value; /* Starting value for biv */
rtx end_value; /* Limit for biv */
int swap_p; /* 1 for count down */
int incr; /* Increment for biv -- must be constant */
int shift; /* log2(incr) */
int off_by_one; /* 1 for "<", 0 for "<=" */
int unsigned_p; /* True if unsigned comparison at loop end */
rtx loop_start;
}
c4x_rptb_info_t;
/* Test and compare insns in c4x.md store the information needed to
generate branch and scc insns here. */
@ -167,29 +151,6 @@ tree pure_tree = NULL_TREE;
tree noreturn_tree = NULL_TREE;
tree interrupt_tree = NULL_TREE;
static void
c4x_dump (file, s)
FILE * file;
const char *s;
...
{
#ifndef __STDC__
char *s;
#endif
va_list ap;
if (!file)
return;
VA_START (ap, s);
#ifndef __STDC__
s = va_arg (ap, char *);
#endif
vfprintf (file, s, ap);
va_end (ap);
}
/* Override command line options.
Called once after all options have been parsed.
@ -3204,14 +3165,9 @@ c4x_address_conflict (op0, op1, store0, store1)
if (!TARGET_DEVEL && base0 == base1 && (incdec0 || incdec1))
return 1;
/* It is not worthwhile having parallel loads from the same address
unless we could be sure that both locations were in internal
memory. We allow this for peepholes (after reload has completed
since we are going to be executing two insns to the same address
anyhow) but steer the combiner away from doing this since it seems
to get the wrong idea. */
if (!store0 && !store1 && base0 == base1 && disp0 == disp1
&& !reload_completed)
/* We can not optimize the case where op1 and op2 refer to the same
address. */
if (base0 == base1 && disp0 == disp1 && index0 == index1)
return 1;
/* No conflict. */
@ -4623,8 +4579,14 @@ c4x_parallel_process (loop_start, loop_end)
if (!(loop_count_set = single_set (loop_count)))
return 0;
#if 0
/* Disable this optimisation until REG_LOOP_COUNT note
added. */
if (!find_reg_note (loop_count, REG_LOOP_COUNT, NULL_RTX))
return 0;
#else
return 0;
#endif
loop_count_reg = SET_DEST (loop_count_set);
@ -4912,470 +4874,6 @@ c4x_combine_parallel (insns)
}
/* True if INSN is between START and END. If END precedes START
something has gone awry. */
static int
c4x_rptb_in_range (insn, start, end)
rtx insn, start, end;
{
rtx this;
for (this = start; ; this = NEXT_INSN (this))
{
if (this == insn)
return 1;
if (this == end)
return 0;
if (this == NULL_RTX)
fatal_insn ("c4x_rptb_in_range: Repeat block error", start);
}
}
/* Returns true if there are no jumps crossing the loop boundary and
no calls anywhere. */
int
c4x_rptb_unjumped_loop_p (loop_start, loop_end)
rtx loop_start, loop_end;
{
rtx insn;
rtx continue_label = NULL_RTX;
rtx continue_note = NULL_RTX; /* Loop continue note if there is one. */
/* Scan loop backwards. */
for (insn = PREV_INSN (loop_end); insn && insn != loop_start;
insn = PREV_INSN (insn))
{
switch (GET_CODE (insn))
{
case JUMP_INSN:
{
rtx jump_label = JUMP_LABEL (insn);
/* We don't like jumps out of the loop. We also look
for jumps to the end of loop, say from a continue
statement. */
if (continue_note
&& jump_label == next_nonnote_insn (continue_note))
continue_label = jump_label;
else if (!c4x_rptb_in_range (jump_label, loop_start,
continue_note ? continue_note :
loop_end))
return 0;
}
/* Fall through */
case INSN:
if (0 && volatile_refs_p (PATTERN (insn)))
{
c4x_dump (loop_dump_stream,
"Repeat block: Volatile memory ref within loop\n");
return 0;
}
/* The C4x movstrqi_large pattern clobbers RC, RE, RS.
This should be generalised to check for insns that use
these registers within the loop. */
if (recog_memoized (insn) == CODE_FOR_movstrqi_large)
{
c4x_dump (loop_dump_stream,
"Repeat block: Memory copy within loop\n");
return 0;
}
break;
/* It is not worthwhile preserving the zero overhead loop
context across calls. */
case CALL_INSN:
/* We could allow a libcall with no side effects??? */
c4x_dump (loop_dump_stream, "Repeat block: Call within loop\n");
return 0;
case NOTE:
switch (NOTE_LINE_NUMBER (insn))
{
case NOTE_INSN_LOOP_CONT:
if (continue_note == NULL_RTX)
continue_note = insn;
/* Check for empty loop which would throw c4x_rptb_nop_p.
GCC doesn't optimise empty loops away since user
may be trying to implement a simple but crude delay. */
if (GET_CODE (PREV_INSN (insn)) == NOTE
&& NOTE_LINE_NUMBER (PREV_INSN (insn)) == NOTE_INSN_LOOP_BEG)
{
c4x_dump (loop_dump_stream, "Repeat block: Empty loop\n");
return 0;
}
break;
/* If we find a LOOP_END note, then we are not in the
innermost loop. */
case NOTE_INSN_LOOP_END:
return 0;
default:
continue;
}
default:
continue;
}
}
if (insn == NULL_RTX)
fatal("Repeat block: Inconsistent loop");
c4x_dump (loop_dump_stream, "Repeat block: Unjumped loop\n");
if (continue_label)
c4x_dump (loop_dump_stream, "Repeat block: Continue_label %d\n",
INSN_UID (continue_label));
return 1;
}
/* Find and record in PCOMP and PJUMP the final comparison and jump
insns of the loop specified by LOOP_END. Return 1 if both have been
found, otherwise return 0. */
static int
c4x_rptb_find_comp_and_jump (loop_end, pcomp, pjump)
rtx loop_end;
rtx *pcomp, *pjump;
{
rtx final_comp, comp_pat;
rtx final_jump = prev_nonnote_insn (loop_end);
if (!final_jump)
return 0;
final_comp = PREV_INSN (final_jump);
if (!final_comp)
return 0;
if ((GET_CODE (final_comp) != INSN))
return 0;
comp_pat = PATTERN (final_comp);
if ((GET_CODE (comp_pat) != SET)
|| GET_CODE (XEXP (comp_pat, 0)) != REG
|| REGNO (XEXP (comp_pat, 0)) != ST_REGNO)
return 0;
*pcomp = final_comp;
*pjump = final_jump;
return 1;
}
/* Determine if the loop count is computable for a repeat loop. */
static int
c4x_rptb_loop_info_get (loop_start, loop_end, loop_info)
rtx loop_start, loop_end;
c4x_rptb_info_t *loop_info;
{
rtx iteration_var, initial_value, increment, comparison;
enum rtx_code cc; /* Comparison code */
rtx comparison_value;
loop_info->loop_start = loop_start;
loop_info->loop_count = loop_iterations (loop_start, loop_end);
/* If the number of loop cycles does not need calculating at
run-time then things are easy... Note that the repeat count
value must be a positive integer for the RPTB instruction. If
loop_count is zero then we don't have a constant count. */
if (loop_info->loop_count > 0)
return 1;
if (loop_info->loop_count < 0)
{
c4x_dump (loop_dump_stream, "Repeat block: Negative loop count %d\n",
loop_info->loop_count);
return 0;
}
comparison = get_condition_for_loop (prev_nonnote_insn (loop_end));
if (comparison == NULL_RTX)
{
c4x_dump (loop_dump_stream, "Repeat block: Cannot find comparison\n");
return 0;
}
cc = GET_CODE (comparison);
/* Only allow a register as the iteration value. */
iteration_var = XEXP (comparison, 0);
if (GET_CODE (iteration_var) != REG)
{
c4x_dump (loop_dump_stream, "Repeat block: Non reg. iteration value\n");
return 0;
}
c4x_dump (loop_dump_stream, "Repeat block: Iteration value regno = %d\n",
REGNO (iteration_var));
/* The comparison value must not change on the fly. */
comparison_value = XEXP (comparison, 1);
if (!invariant_p (comparison_value))
{
c4x_dump (loop_dump_stream, "Repeat block: Comparison value variant\n");
return 0;
}
/* This routine in unroll.c does the hard work of finding the
initial value and increment for us. Currently it won't find the
intitial value or increment for do {} while; or while() {} do;
loops. This is because the iteration_var we find in the
comparison insn is a GIV rather than a BIV and iteration_info does
not like GIVs. We could scan all the BIVs like check_dbra_loop()
does... */
iteration_info (iteration_var, &initial_value, &increment,
loop_start, loop_end);
if (initial_value == NULL_RTX || increment == NULL_RTX)
{
c4x_dump (loop_dump_stream, "Repeat block: Cannot determine initial"
" value or increment\n");
return 0;
}
/* Only allow constant integer increment, not a variable. */
if (GET_CODE (increment) != CONST_INT)
{
c4x_dump (loop_dump_stream, "Repeat block: Increment not constant\n");
return 0;
}
loop_info->incr = INTVAL (increment);
/* If the increment is not a power of 2, (i.e, 1, 2, 4, etc.) then
we will need to emit a divide instruction rather than a right
shift to calculate the loop count. */
if ((loop_info->shift = exact_log2 (abs (loop_info->incr))) < 0)
{
c4x_dump (loop_dump_stream, "Repeat block: Increment not power of 2\n");
return 0;
}
/* The front end changes GT to NE for unsigned numbers, so we
"undo" this here for clarity. */
loop_info->unsigned_p = 0;
if (GET_CODE (increment) == CONST_INT
&& INTVAL (increment) == -1 && cc == NE)
{
loop_info->unsigned_p = 1;
cc = GT;
}
if (!(cc == LT || cc == LE || cc == LTU || cc == LEU
|| cc == GT || cc == GE || cc == GTU || cc == GEU))
{
c4x_dump (loop_dump_stream, "Repeat block: Invalid comparison\n");
return 0;
}
loop_info->swap_p = (cc == GT || cc == GE || cc == GTU || cc == GEU);
if (loop_info->swap_p)
{
loop_info->start_value = comparison_value;
loop_info->end_value = initial_value;
loop_info->incr = -loop_info->incr;
}
else
{
loop_info->start_value = initial_value;
loop_info->end_value = comparison_value;
}
/* Check if loop won't terminate? */
if (loop_info->incr <= 0)
{
c4x_dump (loop_dump_stream, "Repeat block: Increment negative\n");
return 0;
}
loop_info->off_by_one = (cc == LT || cc == LTU || cc == GT || cc == GTU);
/* We have a switch to allow an unsigned loop counter.
We'll normally disallow this case since the the repeat
count for the RPTB instruction must be less than 0x80000000. */
if (loop_info->unsigned_p && !TARGET_LOOP_UNSIGNED)
{
c4x_dump (loop_dump_stream, "Repeat block: Unsigned comparison\n");
return 0;
}
return 1;
}
/* Emit insn(s) to compute loop iteration count. */
static rtx
c4x_rptb_emit_init (loop_info)
c4x_rptb_info_t *loop_info;
{
rtx result;
int adjust;
rtx seq_start;
/* If have a known constant loop count, things are easy... */
if (loop_info->loop_count > 0)
return GEN_INT (loop_info->loop_count - 1);
if (loop_info->shift < 0)
abort ();
start_sequence ();
result = loop_info->end_value;
if (loop_info->start_value != const0_rtx)
{
/* end_value - start_value */
result = expand_binop (QImode, sub_optab,
result, loop_info->start_value,
0, loop_info->unsigned_p, OPTAB_DIRECT);
}
adjust = loop_info->incr - loop_info->off_by_one;
if (adjust > 0)
{
/* end_value - start_value + adjust */
result = expand_binop (QImode, add_optab,
result, GEN_INT (adjust),
0, loop_info->unsigned_p, OPTAB_DIRECT);
}
if (loop_info->shift > 0)
{
/* (end_value - start_value + adjust) >> shift */
result = expand_binop (QImode, loop_info->unsigned_p ?
lshr_optab : ashr_optab, result,
GEN_INT (loop_info->shift),
0, loop_info->unsigned_p, OPTAB_DIRECT);
}
/* ((end_value - start_value + adjust) >> shift) - 1 */
result = expand_binop (QImode, sub_optab,
result, GEN_INT (1),
0, loop_info->unsigned_p, OPTAB_DIRECT);
seq_start = get_insns ();
end_sequence ();
emit_insns_before (seq_start, loop_info->loop_start);
return result;
}
/* This routine checks for suitable loops that can use zero overhead
looping and emits insns marking the start and end of the loop
as well as an insn for initialising the loop counter. */
void
c4x_rptb_process (loop_start, loop_end)
rtx loop_start, loop_end;
{
rtx iteration_count;
rtx start_label;
rtx end_label;
rtx comp_insn;
rtx jump_insn;
c4x_rptb_info_t info;
if (!TARGET_RPTB)
return;
/* Check that there are no jumps crossing loop boundary or calls. */
if (!c4x_rptb_unjumped_loop_p (loop_start, loop_end))
return;
start_label = next_nonnote_insn (loop_start);
if (GET_CODE (start_label) != CODE_LABEL)
return;
/* Find comparison and jump insns. */
if (!c4x_rptb_find_comp_and_jump (loop_end, &comp_insn, &jump_insn))
return;
/* If we don't jump back to start label, then the loop is no good. */
if (start_label != JUMP_LABEL (jump_insn))
return;
/* Check that number of loops is computable. */
if (!c4x_rptb_loop_info_get (loop_start, loop_end, &info))
return;
c4x_dump (loop_dump_stream, "Repeat block: Loop start at %d, end at %d\n",
INSN_UID (loop_start), INSN_UID (loop_end));
if (info.loop_count > 0)
c4x_dump (loop_dump_stream, "Repeat block: Loop count = %d\n",
info.loop_count);
else
c4x_dump (loop_dump_stream,
"Repeat block: incr %d, shift %d, swap_p %d,"
" off_by_one %d, unsigned_p %d\n",
info.incr, info.shift, info.swap_p,
info.off_by_one, info.unsigned_p);
/* Emit insns to compute loop iteration count. */
iteration_count = c4x_rptb_emit_init (&info);
if (iteration_count == NULL_RTX)
abort ();
/* Add label at end of loop, immediately after jump insn. */
end_label = gen_label_rtx ();
emit_label_after (end_label, jump_insn);
/* Add label to forced label list to prevent jump optimisation
coalescing end_label with bypass_label since we need these destinct if
we are to sink insns out of the loop. */
if (GET_CODE (NEXT_INSN (loop_end)) == CODE_LABEL)
{
rtx bypass_label;
bypass_label = NEXT_INSN (loop_end);
#if 0
forced_labels = gen_rtx_EXPR_LIST (VOIDmode,
end_label, forced_labels);
forced_labels = gen_rtx_EXPR_LIST (VOIDmode,
bypass_label, forced_labels);
#endif
emit_insn_after (gen_repeat_block_filler (), end_label);
c4x_dump (loop_dump_stream,
"Repeat block: Start label at %d, end label at %d,"
" bypass label at %d\n",
INSN_UID (start_label), INSN_UID (end_label),
INSN_UID (bypass_label));
}
else
{
emit_insn_after (gen_repeat_block_filler (), end_label);
c4x_dump (loop_dump_stream,
"Repeat block: Start label at %d, end label at %d\n",
INSN_UID (start_label), INSN_UID (end_label));
}
/* Create pattern for repeat_block_top and insert at top of loop. */
emit_insn_before (gen_repeat_block_top (const0_rtx, iteration_count,
start_label, end_label),
start_label);
/* Replace the jump instruction with repeat_block_end insn. */
PATTERN (jump_insn) = gen_repeat_block_end (const0_rtx, start_label);
/* The insn is unrecognizable after the surgery. */
INSN_CODE (jump_insn) = -1;
/* Delete the comparison insn. */
delete_insn (comp_insn);
}
/* !!! FIXME to emit RPTS correctly. */
int
c4x_rptb_rpts_p (insn, op)
@ -5410,7 +4908,7 @@ c4x_rptb_rpts_p (insn, op)
if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
return 0;
if (recog_memoized (insn) != CODE_FOR_repeat_block_end)
if (recog_memoized (insn) != CODE_FOR_rptb_end)
return 0;
if (TARGET_RPTS)
@ -5419,48 +4917,6 @@ c4x_rptb_rpts_p (insn, op)
return (GET_CODE (op) == CONST_INT) && TARGET_RPTS_CYCLES (INTVAL (op));
}
/*
Loop structure of `for' loops:
Check if iterations required
If not, jump to BYPASS_LABEL
NOTE_INSN_LOOP_BEG
<<<Repeat block top goes here>>
START_LABEL:
{NOTE_BLOCK_BEGIN}
Body of loop
{NOTE_BLOCK_END}
{NOTE_INSN_LOOP_CONT}
Increment loop counters here
{NOTE_INSN_LOOP_VTOP}
<<<Repeat block nop goes here if nec.>>>
Exit test here <<<This gets deleted>>>
If not exiting jump to START_LABEL <<<Repeat block end goes here>>>
<<<END_LABEL goes here>>
NOTE_INSN_LOOP_END
BYPASS_LABEL:
Note that NOTE_INSN_LOOP_VTOP is only required for loops such as
for loops, where it necessary to duplicate the exit test. This
position becomes another virtual start of the loop when considering
invariants.
Note that if there is nothing in the loop body we get:
NOTE_INSN_LOOP_BEG
NOTE_INSN_LOOP_CONT
START_LABEL:
NOTE_INSN_LOOP_VTOP
...
*/
/* Adjust the cost of a scheduling dependency. Return the new cost of
a dependency LINK or INSN on DEP_INSN. COST is the current cost.