mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 20:51:45 +08:00
Add zero-cost loops for xtensa port.
2014-10-10 Felix Yang <felix.yang@huawei.com> * config/xtensa/xtensa.h (TARGET_LOOPS): New Macro. * config/xtensa/xtensa.c: Include dumpfile.h and hw-doloop.h. (xtensa_reorg, xtensa_reorg_loops): New. (xtensa_can_use_doloop_p, xtensa_invalid_within_doloop): New. (hwloop_optimize, hwloop_fail, hwloop_pattern_reg): New. (xtensa_emit_loop_end): Emit the zero-overhead loop end label. (xtensa_doloop_hooks): Define. * config/xtensa/xtensa.md (doloop_end, loop_end): New (zero_cost_loop_start): Rewritten. (zero_cost_loop_end): Likewise. From-SVN: r216945
This commit is contained in:
parent
77893d0b75
commit
6383386a1b
@ -1,3 +1,16 @@
|
||||
2014-10-10 Felix Yang <felix.yang@huawei.com>
|
||||
|
||||
* config/xtensa/xtensa.h (TARGET_LOOPS): New Macro.
|
||||
* config/xtensa/xtensa.c: Include dumpfile.h and hw-doloop.h.
|
||||
(xtensa_reorg, xtensa_reorg_loops): New.
|
||||
(xtensa_can_use_doloop_p, xtensa_invalid_within_doloop): New.
|
||||
(hwloop_optimize, hwloop_fail, hwloop_pattern_reg): New.
|
||||
(xtensa_emit_loop_end): Emit the zero-overhead loop end label.
|
||||
(xtensa_doloop_hooks): Define.
|
||||
* config/xtensa/xtensa.md (doloop_end, loop_end): New
|
||||
(zero_cost_loop_start): Rewritten.
|
||||
(zero_cost_loop_end): Likewise.
|
||||
|
||||
2014-10-30 Steve Ellcey <sellcey@imgtec.com>
|
||||
|
||||
* config.gcc (mips*-*-linux*): Combine 32 and 64 bit cases.
|
||||
|
@ -74,6 +74,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "gimplify.h"
|
||||
#include "df.h"
|
||||
#include "builtins.h"
|
||||
#include "dumpfile.h"
|
||||
#include "hw-doloop.h"
|
||||
#include "rtl-iter.h"
|
||||
|
||||
|
||||
@ -200,6 +202,10 @@ static reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t,
|
||||
|
||||
static bool constantpool_address_p (const_rtx addr);
|
||||
static bool xtensa_legitimate_constant_p (machine_mode, rtx);
|
||||
static void xtensa_reorg (void);
|
||||
static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int &,
|
||||
unsigned int, bool);
|
||||
static const char *xtensa_invalid_within_doloop (const rtx_insn *);
|
||||
|
||||
static bool xtensa_member_type_forces_blk (const_tree,
|
||||
machine_mode mode);
|
||||
@ -326,6 +332,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
|
||||
#undef TARGET_LEGITIMATE_CONSTANT_P
|
||||
#define TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p
|
||||
|
||||
#undef TARGET_MACHINE_DEPENDENT_REORG
|
||||
#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg
|
||||
|
||||
#undef TARGET_CAN_USE_DOLOOP_P
|
||||
#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p
|
||||
|
||||
#undef TARGET_INVALID_WITHIN_DOLOOP
|
||||
#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
|
||||
@ -1690,7 +1705,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operands)
|
||||
}
|
||||
}
|
||||
|
||||
output_asm_insn ("# loop end for %0", operands);
|
||||
output_asm_insn ("%1_LEND:", operands);
|
||||
}
|
||||
|
||||
|
||||
@ -3720,4 +3735,236 @@ xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
|
||||
return !xtensa_tls_referenced_p (x);
|
||||
}
|
||||
|
||||
/* Implement TARGET_CAN_USE_DOLOOP_P. */
|
||||
|
||||
static bool
|
||||
xtensa_can_use_doloop_p (const widest_int &, const widest_int &,
|
||||
unsigned int loop_depth, bool entered_at_top)
|
||||
{
|
||||
/* Considering limitations in the hardware, only use doloop
|
||||
for innermost loops which must be entered from the top. */
|
||||
if (loop_depth > 1 || !entered_at_top)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* NULL if INSN insn is valid within a low-overhead loop.
|
||||
Otherwise return why doloop cannot be applied. */
|
||||
|
||||
static const char *
|
||||
xtensa_invalid_within_doloop (const rtx_insn *insn)
|
||||
{
|
||||
if (CALL_P (insn))
|
||||
return "Function call in the loop.";
|
||||
|
||||
if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
|
||||
return "Return from a call instruction in the loop.";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Optimize LOOP. */
|
||||
|
||||
static bool
|
||||
hwloop_optimize (hwloop_info loop)
|
||||
{
|
||||
int i;
|
||||
edge entry_edge;
|
||||
basic_block entry_bb;
|
||||
rtx iter_reg;
|
||||
rtx_insn *insn, *seq, *entry_after;
|
||||
|
||||
if (loop->depth > 1)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; loop %d is not innermost\n",
|
||||
loop->loop_no);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!loop->incoming_dest)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; loop %d has more than one entry\n",
|
||||
loop->loop_no);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (loop->incoming_dest != loop->head)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; loop %d is not entered from head\n",
|
||||
loop->loop_no);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (loop->has_call || loop->has_asm)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; loop %d has invalid insn\n",
|
||||
loop->loop_no);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Scan all the blocks to make sure they don't use iter_reg. */
|
||||
if (loop->iter_reg_used || loop->iter_reg_used_outside)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; loop %d uses iterator\n",
|
||||
loop->loop_no);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if start_label appears before doloop_end. */
|
||||
insn = loop->start_label;
|
||||
while (insn && insn != loop->loop_end)
|
||||
insn = NEXT_INSN (insn);
|
||||
|
||||
if (!insn)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
|
||||
loop->loop_no);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Get the loop iteration register. */
|
||||
iter_reg = loop->iter_reg;
|
||||
|
||||
gcc_assert (REG_P (iter_reg));
|
||||
|
||||
entry_edge = NULL;
|
||||
|
||||
FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
|
||||
if (entry_edge->flags & EDGE_FALLTHRU)
|
||||
break;
|
||||
|
||||
if (entry_edge == NULL)
|
||||
return false;
|
||||
|
||||
/* Place the zero_cost_loop_start instruction before the loop. */
|
||||
entry_bb = entry_edge->src;
|
||||
|
||||
start_sequence ();
|
||||
|
||||
insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg,
|
||||
loop->start_label,
|
||||
loop->iter_reg));
|
||||
|
||||
seq = get_insns ();
|
||||
|
||||
if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
|
||||
{
|
||||
basic_block new_bb;
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
|
||||
emit_insn_before (seq, BB_HEAD (loop->head));
|
||||
seq = emit_label_before (gen_label_rtx (), seq);
|
||||
new_bb = create_basic_block (seq, insn, entry_bb);
|
||||
FOR_EACH_EDGE (e, ei, loop->incoming)
|
||||
{
|
||||
if (!(e->flags & EDGE_FALLTHRU))
|
||||
redirect_edge_and_branch_force (e, new_bb);
|
||||
else
|
||||
redirect_edge_succ (e, new_bb);
|
||||
}
|
||||
|
||||
make_edge (new_bb, loop->head, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
entry_after = BB_END (entry_bb);
|
||||
while (DEBUG_INSN_P (entry_after)
|
||||
|| (NOTE_P (entry_after)
|
||||
&& NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
|
||||
entry_after = PREV_INSN (entry_after);
|
||||
|
||||
emit_insn_after (seq, entry_after);
|
||||
}
|
||||
|
||||
end_sequence ();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A callback for the hw-doloop pass. Called when a loop we have discovered
|
||||
turns out not to be optimizable; we have to split the loop_end pattern into
|
||||
a subtract and a test. */
|
||||
|
||||
static void
|
||||
hwloop_fail (hwloop_info loop)
|
||||
{
|
||||
rtx test;
|
||||
rtx_insn *insn = loop->loop_end;
|
||||
|
||||
emit_insn_before (gen_addsi3 (loop->iter_reg,
|
||||
loop->iter_reg,
|
||||
constm1_rtx),
|
||||
loop->loop_end);
|
||||
|
||||
test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
|
||||
insn = emit_jump_insn_before (gen_cbranchsi4 (test,
|
||||
loop->iter_reg, const0_rtx,
|
||||
loop->start_label),
|
||||
loop->loop_end);
|
||||
|
||||
JUMP_LABEL (insn) = loop->start_label;
|
||||
LABEL_NUSES (loop->start_label)++;
|
||||
delete_insn (loop->loop_end);
|
||||
}
|
||||
|
||||
/* A callback for the hw-doloop pass. This function examines INSN; if
|
||||
it is a doloop_end pattern we recognize, return the reg rtx for the
|
||||
loop counter. Otherwise, return NULL_RTX. */
|
||||
|
||||
static rtx
|
||||
hwloop_pattern_reg (rtx_insn *insn)
|
||||
{
|
||||
rtx reg;
|
||||
|
||||
if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
|
||||
return NULL_RTX;
|
||||
|
||||
reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
|
||||
if (!REG_P (reg))
|
||||
return NULL_RTX;
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
static struct hw_doloop_hooks xtensa_doloop_hooks =
|
||||
{
|
||||
hwloop_pattern_reg,
|
||||
hwloop_optimize,
|
||||
hwloop_fail
|
||||
};
|
||||
|
||||
/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
|
||||
and tries to rewrite the RTL of these loops so that proper Xtensa
|
||||
hardware loops are generated. */
|
||||
|
||||
static void
|
||||
xtensa_reorg_loops (void)
|
||||
{
|
||||
reorg_loops (false, &xtensa_doloop_hooks);
|
||||
}
|
||||
|
||||
/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. */
|
||||
|
||||
static void
|
||||
xtensa_reorg (void)
|
||||
{
|
||||
/* We are freeing block_for_insn in the toplev to keep compatibility
|
||||
with old MDEP_REORGS that are not CFG based. Recompute it now. */
|
||||
compute_bb_for_insn ();
|
||||
|
||||
df_analyze ();
|
||||
|
||||
/* Doloop optimization. */
|
||||
xtensa_reorg_loops ();
|
||||
}
|
||||
|
||||
#include "gt-xtensa.h"
|
||||
|
@ -65,6 +65,7 @@ extern unsigned xtensa_current_frame_size;
|
||||
#define TARGET_S32C1I XCHAL_HAVE_S32C1I
|
||||
#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
|
||||
#define TARGET_THREADPTR XCHAL_HAVE_THREADPTR
|
||||
#define TARGET_LOOPS XCHAL_HAVE_LOOPS
|
||||
|
||||
#define TARGET_DEFAULT \
|
||||
((XCHAL_HAVE_L32R ? 0 : MASK_CONST16) | \
|
||||
|
@ -35,6 +35,8 @@
|
||||
(UNSPEC_TLS_CALL 9)
|
||||
(UNSPEC_TP 10)
|
||||
(UNSPEC_MEMW 11)
|
||||
(UNSPEC_LSETUP_START 12)
|
||||
(UNSPEC_LSETUP_END 13)
|
||||
|
||||
(UNSPECV_SET_FP 1)
|
||||
(UNSPECV_ENTRY 2)
|
||||
@ -1279,41 +1281,120 @@
|
||||
(set_attr "length" "3")])
|
||||
|
||||
|
||||
;; Zero-overhead looping support.
|
||||
|
||||
;; Define the loop insns used by bct optimization to represent the
|
||||
;; start and end of a zero-overhead loop (in loop.c). This start
|
||||
;; template generates the loop insn; the end template doesn't generate
|
||||
;; any instructions since loop end is handled in hardware.
|
||||
;; start and end of a zero-overhead loop. This start template generates
|
||||
;; the loop insn; the end template doesn't generate any instructions since
|
||||
;; loop end is handled in hardware.
|
||||
|
||||
(define_insn "zero_cost_loop_start"
|
||||
[(set (pc)
|
||||
(if_then_else (eq (match_operand:SI 0 "register_operand" "a")
|
||||
(const_int 0))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (reg:SI 19)
|
||||
(plus:SI (match_dup 0) (const_int -1)))]
|
||||
""
|
||||
"loopnez\t%0, %l1"
|
||||
(if_then_else (ne (match_operand:SI 0 "register_operand" "2")
|
||||
(const_int 1))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "register_operand" "=a")
|
||||
(plus (match_dup 0)
|
||||
(const_int -1)))
|
||||
(unspec [(const_int 0)] UNSPEC_LSETUP_START)]
|
||||
"TARGET_LOOPS && optimize"
|
||||
"loop\t%0, %l1_LEND"
|
||||
[(set_attr "type" "jump")
|
||||
(set_attr "mode" "none")
|
||||
(set_attr "length" "3")])
|
||||
|
||||
(define_insn "zero_cost_loop_end"
|
||||
[(set (pc)
|
||||
(if_then_else (ne (reg:SI 19) (const_int 0))
|
||||
(label_ref (match_operand 0 "" ""))
|
||||
(pc)))
|
||||
(set (reg:SI 19)
|
||||
(plus:SI (reg:SI 19) (const_int -1)))]
|
||||
""
|
||||
(if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "2,2")
|
||||
(const_int 1))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "nonimmediate_operand" "=a,m")
|
||||
(plus (match_dup 0)
|
||||
(const_int -1)))
|
||||
(unspec [(const_int 0)] UNSPEC_LSETUP_END)
|
||||
(clobber (match_scratch:SI 3 "=X,&r"))]
|
||||
"TARGET_LOOPS && optimize"
|
||||
"#"
|
||||
[(set_attr "type" "jump")
|
||||
(set_attr "mode" "none")
|
||||
(set_attr "length" "0")])
|
||||
|
||||
(define_insn "loop_end"
|
||||
[(set (pc)
|
||||
(if_then_else (ne (match_operand:SI 0 "register_operand" "2")
|
||||
(const_int 1))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "register_operand" "=a")
|
||||
(plus (match_dup 0)
|
||||
(const_int -1)))
|
||||
(unspec [(const_int 0)] UNSPEC_LSETUP_END)]
|
||||
"TARGET_LOOPS && optimize"
|
||||
{
|
||||
xtensa_emit_loop_end (insn, operands);
|
||||
return "";
|
||||
xtensa_emit_loop_end (insn, operands);
|
||||
return "";
|
||||
}
|
||||
[(set_attr "type" "jump")
|
||||
(set_attr "mode" "none")
|
||||
(set_attr "length" "0")])
|
||||
|
||||
(define_split
|
||||
[(set (pc)
|
||||
(if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "")
|
||||
(const_int 1))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "nonimmediate_operand" "")
|
||||
(plus:SI (match_dup 0)
|
||||
(const_int -1)))
|
||||
(unspec [(const_int 0)] UNSPEC_LSETUP_END)
|
||||
(clobber (match_scratch 3))]
|
||||
"TARGET_LOOPS && optimize && reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
if (!REG_P (operands[0]))
|
||||
{
|
||||
rtx test;
|
||||
|
||||
/* Fallback into a normal conditional branch insn. */
|
||||
emit_move_insn (operands[3], operands[0]);
|
||||
emit_insn (gen_addsi3 (operands[3], operands[3], constm1_rtx));
|
||||
emit_move_insn (operands[0], operands[3]);
|
||||
test = gen_rtx_NE (VOIDmode, operands[3], const0_rtx);
|
||||
emit_jump_insn (gen_cbranchsi4 (test, operands[3],
|
||||
const0_rtx, operands[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_jump_insn (gen_loop_end (operands[0], operands[1], operands[2]));
|
||||
}
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
; operand 0 is the loop count pseudo register
|
||||
; operand 1 is the label to jump to at the top of the loop
|
||||
(define_expand "doloop_end"
|
||||
[(parallel [(set (pc) (if_then_else
|
||||
(ne (match_operand:SI 0 "" "")
|
||||
(const_int 1))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (match_dup 0)
|
||||
(plus:SI (match_dup 0)
|
||||
(const_int -1)))
|
||||
(unspec [(const_int 0)] UNSPEC_LSETUP_END)
|
||||
(clobber (match_dup 2))])] ; match_scratch
|
||||
"TARGET_LOOPS && optimize"
|
||||
{
|
||||
/* The loop optimizer doesn't check the predicates... */
|
||||
if (GET_MODE (operands[0]) != SImode)
|
||||
FAIL;
|
||||
operands[2] = gen_rtx_SCRATCH (SImode);
|
||||
})
|
||||
|
||||
|
||||
;; Setting a register from a comparison.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user