mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-15 11:40:57 +08:00
re PR target/51244 ([SH] Inefficient conditional branch and code around T bit)
PR target/51244 * config/sh/sh_treg_combine.cc: New SH specific RTL pass. * config.gcc (SH extra_objs): Add sh_ifcvt.o. * config/sh/t-sh (sh_treg_combine.o): New entry. * config/sh/sh.c (sh_fixed_condition_code_regs): New function that implements the target hook TARGET_FIXED_CONDITION_CODE_REGS. (register_sh_passes): New function. Register sh_treg_combine pass. (sh_option_override): Invoke it. (sh_canonicalize_comparison): Handle op0_preserve_value. * sh.md (*cbranch_t"): Do not try to optimize missed test and branch opportunities. Canonicalize branch condition. (nott): Allow only if pseudos can be created for non-SH2A. PR target/51244 * gcc.dg/torture/p51244-21.c: New. * gcc.target/sh/pr51244-20.c: New. * gcc.target/sh/pr51244-20-sh2a.c: New. From-SVN: r203492
This commit is contained in:
parent
585a0b9916
commit
5d30dc5b6d
@ -1,3 +1,18 @@
|
||||
2013-10-12 Oleg Endo <olegendo@gcc.gnu.org>
|
||||
|
||||
PR target/51244
|
||||
* config/sh/sh_treg_combine.cc: New SH specific RTL pass.
|
||||
* config.gcc (SH extra_objs): Add sh_ifcvt.o.
|
||||
* config/sh/t-sh (sh_treg_combine.o): New entry.
|
||||
* config/sh/sh.c (sh_fixed_condition_code_regs): New function that
|
||||
implements the target hook TARGET_FIXED_CONDITION_CODE_REGS.
|
||||
(register_sh_passes): New function. Register sh_treg_combine pass.
|
||||
(sh_option_override): Invoke it.
|
||||
(sh_canonicalize_comparison): Handle op0_preserve_value.
|
||||
* sh.md (*cbranch_t"): Do not try to optimize missed test and branch
|
||||
opportunities. Canonicalize branch condition.
|
||||
(nott): Allow only if pseudos can be created for non-SH2A.
|
||||
|
||||
2013-10-12 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/58690
|
||||
|
@ -465,6 +465,7 @@ sh[123456789lbe]*-*-* | sh-*-*)
|
||||
cpu_type=sh
|
||||
need_64bit_hwint=yes
|
||||
extra_options="${extra_options} fused-madd.opt"
|
||||
extra_objs="${extra_objs} sh_treg_combine.o"
|
||||
;;
|
||||
v850*-*-*)
|
||||
cpu_type=v850
|
||||
|
@ -53,6 +53,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "alloc-pool.h"
|
||||
#include "tm-constrs.h"
|
||||
#include "opts.h"
|
||||
#include "tree-pass.h"
|
||||
#include "pass_manager.h"
|
||||
#include "context.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
@ -311,6 +314,7 @@ static bool sequence_insn_p (rtx);
|
||||
static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
|
||||
static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
|
||||
enum machine_mode, bool);
|
||||
static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
|
||||
|
||||
static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
|
||||
|
||||
@ -587,6 +591,9 @@ static const struct attribute_spec sh_attribute_table[] =
|
||||
#undef TARGET_CANONICALIZE_COMPARISON
|
||||
#define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
|
||||
|
||||
#undef TARGET_FIXED_CONDITION_CODE_REGS
|
||||
#define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
|
||||
|
||||
/* Machine-specific symbol_ref flags. */
|
||||
#define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
|
||||
|
||||
@ -710,6 +717,34 @@ got_mode_name:;
|
||||
#undef err_ret
|
||||
}
|
||||
|
||||
/* Register SH specific RTL passes. */
|
||||
extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
|
||||
const char* name);
|
||||
static void
|
||||
register_sh_passes (void)
|
||||
{
|
||||
if (!TARGET_SH1)
|
||||
return;
|
||||
|
||||
/* Running the sh_treg_combine pass after ce1 generates better code when
|
||||
comparisons are combined and reg-reg moves are introduced, because
|
||||
reg-reg moves will be eliminated afterwards. However, there are quite
|
||||
some cases where combine will be unable to fold comparison related insns,
|
||||
thus for now don't do it.
|
||||
register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
|
||||
PASS_POS_INSERT_AFTER, "ce1", 1);
|
||||
*/
|
||||
|
||||
/* Run sh_treg_combine pass after combine but before register allocation. */
|
||||
register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
|
||||
PASS_POS_INSERT_AFTER, "split1", 1);
|
||||
|
||||
/* Run sh_treg_combine pass after register allocation and basic block
|
||||
reordering as this sometimes creates new opportunities. */
|
||||
register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
|
||||
PASS_POS_INSERT_AFTER, "split4", 1);
|
||||
}
|
||||
|
||||
/* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
|
||||
various options, and do some machine dependent initialization. */
|
||||
static void
|
||||
@ -1022,6 +1057,8 @@ sh_option_override (void)
|
||||
target CPU. */
|
||||
selected_atomic_model_
|
||||
= parse_validate_atomic_model_option (sh_atomic_model_str);
|
||||
|
||||
register_sh_passes ();
|
||||
}
|
||||
|
||||
/* Print the operand address in x to the stream. */
|
||||
@ -1908,7 +1945,7 @@ prepare_move_operands (rtx operands[], enum machine_mode mode)
|
||||
static void
|
||||
sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
|
||||
enum machine_mode mode,
|
||||
bool op0_preserve_value ATTRIBUTE_UNUSED)
|
||||
bool op0_preserve_value)
|
||||
{
|
||||
/* When invoked from within the combine pass the mode is not specified,
|
||||
so try to get it from one of the operands. */
|
||||
@ -1928,6 +1965,9 @@ sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
|
||||
// Make sure that the constant operand is the second operand.
|
||||
if (CONST_INT_P (op0) && !CONST_INT_P (op1))
|
||||
{
|
||||
if (op0_preserve_value)
|
||||
return;
|
||||
|
||||
std::swap (op0, op1);
|
||||
cmp = swap_condition (cmp);
|
||||
}
|
||||
@ -2016,6 +2056,14 @@ sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
|
||||
*code = (int)tmp_code;
|
||||
}
|
||||
|
||||
bool
|
||||
sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
|
||||
{
|
||||
*p1 = T_REG;
|
||||
*p2 = INVALID_REGNUM;
|
||||
return true;
|
||||
}
|
||||
|
||||
enum rtx_code
|
||||
prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
|
||||
enum rtx_code comparison)
|
||||
|
@ -8419,89 +8419,32 @@ label:
|
||||
return output_branch (sh_eval_treg_value (operands[1]), insn, operands);
|
||||
}
|
||||
"&& 1"
|
||||
[(set (pc) (if_then_else (eq (reg:SI T_REG) (match_dup 2))
|
||||
(label_ref (match_dup 0))
|
||||
(pc)))]
|
||||
[(const_int 0)]
|
||||
{
|
||||
/* Try to find missed test and branch combine opportunities which result
|
||||
in redundant T bit tests before conditional branches.
|
||||
This is done not only after combine (and before reload) but in every
|
||||
split pass, because some opportunities are formed also after combine.
|
||||
FIXME: Probably this would not be needed if CCmode was used
|
||||
together with TARGET_FIXED_CONDITION_CODE_REGS. */
|
||||
/* Try to canonicalize the branch condition if it is not one of:
|
||||
(ne (reg:SI T_REG) (const_int 0))
|
||||
(eq (reg:SI T_REG) (const_int 0))
|
||||
|
||||
const int treg_value = sh_eval_treg_value (operands[1]);
|
||||
operands[2] = NULL_RTX;
|
||||
Instead of splitting out a new insn, we modify the current insn's
|
||||
operands as needed. This preserves things such as REG_DEAD notes. */
|
||||
|
||||
/* Scan the insns backwards for an insn that sets the T bit by testing a
|
||||
reg against zero like:
|
||||
(set (reg T_REG) (eq (reg) (const_int 0))) */
|
||||
rtx testing_insn = NULL_RTX;
|
||||
rtx tested_reg = NULL_RTX;
|
||||
if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
|
||||
&& REG_P (XEXP (operands[1], 0)) && REGNO (XEXP (operands[1], 0)) == T_REG
|
||||
&& XEXP (operands[1], 1) == const0_rtx)
|
||||
DONE;
|
||||
|
||||
set_of_reg s0 = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
|
||||
prev_nonnote_insn_bb);
|
||||
if (s0.set_src != NULL_RTX
|
||||
&& GET_CODE (s0.set_src) == EQ
|
||||
&& REG_P (XEXP (s0.set_src, 0))
|
||||
&& satisfies_constraint_Z (XEXP (s0.set_src, 1)))
|
||||
{
|
||||
testing_insn = s0.insn;
|
||||
tested_reg = XEXP (s0.set_src, 0);
|
||||
}
|
||||
else
|
||||
FAIL;
|
||||
int branch_cond = sh_eval_treg_value (operands[1]);
|
||||
rtx new_cond_rtx = NULL_RTX;
|
||||
|
||||
/* Continue scanning the insns backwards and try to find the insn that
|
||||
sets the tested reg which we found above. If the reg is set by storing
|
||||
the T bit or the negated T bit we can eliminate the test insn before
|
||||
the branch. Notice that the branch condition has to be inverted if the
|
||||
test is eliminated. */
|
||||
if (branch_cond == 0)
|
||||
new_cond_rtx = gen_rtx_EQ (VOIDmode, get_t_reg_rtx (), const0_rtx);
|
||||
else if (branch_cond == 1)
|
||||
new_cond_rtx = gen_rtx_NE (VOIDmode, get_t_reg_rtx (), const0_rtx);
|
||||
|
||||
/* If the T bit is used between the testing insn and the brach insn
|
||||
leave it alone. */
|
||||
if (reg_used_between_p (get_t_reg_rtx (), testing_insn, curr_insn))
|
||||
FAIL;
|
||||
|
||||
while (true)
|
||||
{
|
||||
/* It's not safe to go beyond the current basic block after reload. */
|
||||
set_of_reg s1 = sh_find_set_of_reg (tested_reg, s0.insn,
|
||||
reload_completed
|
||||
? prev_nonnote_insn_bb
|
||||
: prev_nonnote_insn);
|
||||
if (s1.set_src == NULL_RTX)
|
||||
break;
|
||||
|
||||
if (t_reg_operand (s1.set_src, VOIDmode))
|
||||
operands[2] = GEN_INT (treg_value ^ 1);
|
||||
else if (negt_reg_operand (s1.set_src, VOIDmode))
|
||||
operands[2] = GEN_INT (treg_value);
|
||||
else if (REG_P (s1.set_src))
|
||||
{
|
||||
/* If it's a reg-reg copy follow the copied reg. This can
|
||||
happen e.g. when T bit store zero-extensions are
|
||||
eliminated. */
|
||||
tested_reg = s1.set_src;
|
||||
s0.insn = s1.insn;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* It's only safe to remove the testing insn if the T bit is not
|
||||
modified between the testing insn and the insn that stores the
|
||||
T bit. Notice that some T bit stores such as negc also modify
|
||||
the T bit. */
|
||||
if (modified_between_p (get_t_reg_rtx (), s1.insn, testing_insn)
|
||||
|| modified_in_p (get_t_reg_rtx (), s1.insn))
|
||||
operands[2] = NULL_RTX;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (operands[2] == NULL_RTX)
|
||||
FAIL;
|
||||
|
||||
set_insn_deleted (testing_insn);
|
||||
if (new_cond_rtx != NULL_RTX)
|
||||
validate_change (curr_insn, &XEXP (XEXP (PATTERN (curr_insn), 1), 0),
|
||||
new_cond_rtx, false);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "cbranch")])
|
||||
|
||||
@ -11480,10 +11423,13 @@ label:
|
||||
;; multiple insns like:
|
||||
;; movt Rn
|
||||
;; tst Rn,Rn
|
||||
;; This requires an additional pseudo. The SH specific sh_treg_combine RTL
|
||||
;; pass will look for this insn. Disallow using it if pseudos can't be
|
||||
;; created.
|
||||
(define_insn_and_split "nott"
|
||||
[(set (reg:SI T_REG)
|
||||
(xor:SI (match_operand:SI 0 "t_reg_operand" "") (const_int 1)))]
|
||||
"TARGET_SH1"
|
||||
(xor:SI (match_operand:SI 0 "t_reg_operand") (const_int 1)))]
|
||||
"TARGET_SH2A || (TARGET_SH1 && can_create_pseudo_p ())"
|
||||
{
|
||||
gcc_assert (TARGET_SH2A);
|
||||
return "nott";
|
||||
|
1509
gcc/config/sh/sh_treg_combine.cc
Normal file
1509
gcc/config/sh/sh_treg_combine.cc
Normal file
File diff suppressed because it is too large
Load Diff
@ -21,6 +21,10 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \
|
||||
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
|
||||
$(srcdir)/config/sh/sh-c.c
|
||||
|
||||
sh_treg_combine.o: $(srcdir)/config/sh/sh_treg_combine.cc \
|
||||
$(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
|
||||
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
|
||||
|
||||
DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
|
||||
OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG))
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
2013-10-12 Oleg Endo <olegendo@gcc.gnu.org>
|
||||
|
||||
PR target/51244
|
||||
* gcc.dg/torture/p51244-21.c: New.
|
||||
* gcc.target/sh/pr51244-20.c: New.
|
||||
* gcc.target/sh/pr51244-20-sh2a.c: New.
|
||||
|
||||
2013-10-12 Arnaud Charlet <charlet@adacore.com>
|
||||
|
||||
* gnat.dg/specs/linker_section.ads: Update test.
|
||||
|
75
gcc/testsuite/gcc.dg/torture/pr51244-21.c
Normal file
75
gcc/testsuite/gcc.dg/torture/pr51244-21.c
Normal file
@ -0,0 +1,75 @@
|
||||
/* { dg-do run } */
|
||||
#include <assert.h>
|
||||
|
||||
static inline int
|
||||
blk_oversized_queue (int* q)
|
||||
{
|
||||
if (q[2])
|
||||
return q[1] != 0;
|
||||
return q[0] == 0;
|
||||
}
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
get_request (int* q, int rw)
|
||||
{
|
||||
if (blk_oversized_queue (q))
|
||||
{
|
||||
if ((rw == 1) || (rw == 0))
|
||||
return -33;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -100;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int x[3];
|
||||
int r;
|
||||
|
||||
x[0] = 0; x[1] = 1; x[2] = 1;
|
||||
r = get_request (x, 0);
|
||||
assert (r == -33);
|
||||
|
||||
r = get_request (x, 1);
|
||||
assert (r == -33);
|
||||
|
||||
r = get_request (x, 2);
|
||||
assert (r == 0);
|
||||
|
||||
|
||||
x[0] = 0; x[1] = 0; x[2] = 1;
|
||||
r = get_request (x, 0);
|
||||
assert (r == -100);
|
||||
|
||||
r = get_request (x, 1);
|
||||
assert (r == -100);
|
||||
|
||||
r = get_request (x, 2);
|
||||
assert (r == -100);
|
||||
|
||||
|
||||
x[0] = 0; x[1] = 0; x[2] = 0;
|
||||
r = get_request (x, 0);
|
||||
assert (r == -33);
|
||||
|
||||
r = get_request (x, 1);
|
||||
assert (r == -33);
|
||||
|
||||
r = get_request (x, 2);
|
||||
assert (r == 0);
|
||||
|
||||
|
||||
x[0] = 0; x[1] = 0; x[2] = 0;
|
||||
r = get_request (x, 0);
|
||||
assert (r == -33);
|
||||
|
||||
r = get_request (x, 1);
|
||||
assert (r == -33);
|
||||
|
||||
r = get_request (x, 2);
|
||||
assert (r == 0);
|
||||
|
||||
return 0;
|
||||
}
|
14
gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c
Normal file
14
gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c
Normal file
@ -0,0 +1,14 @@
|
||||
/* Check that the SH specific sh_treg_combine RTL optimization pass works as
|
||||
expected. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m2a*" } } */
|
||||
/* { dg-final { scan-assembler-times "tst" 5 } } */
|
||||
/* { dg-final { scan-assembler-times "movt" 0 } } */
|
||||
/* { dg-final { scan-assembler-times "nott" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cmp/eq" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "cmp/hi" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "cmp/gt" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "not\t" 1 } } */
|
||||
|
||||
#include "pr51244-20.c"
|
103
gcc/testsuite/gcc.target/sh/pr51244-20.c
Normal file
103
gcc/testsuite/gcc.target/sh/pr51244-20.c
Normal file
@ -0,0 +1,103 @@
|
||||
/* Check that the SH specific sh_treg_combine RTL optimization pass works as
|
||||
expected. On SH2A the expected insns are slightly different, see
|
||||
pr51244-21.c. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" "-m2a*" } { "" } } */
|
||||
/* { dg-final { scan-assembler-times "tst" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "movt" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "cmp/eq" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "cmp/hi" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "cmp/gt" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "not\t" 1 } } */
|
||||
|
||||
|
||||
/* non-SH2A: 2x tst, 1x movt, 2x cmp/eq, 1x cmp/hi
|
||||
SH2A: 1x tst, 1x nott, 2x cmp/eq, 1x cmp/hi */
|
||||
static inline int
|
||||
blk_oversized_queue_0 (int* q)
|
||||
{
|
||||
if (q[2])
|
||||
return q[1] == 5;
|
||||
return (q[0] != 5);
|
||||
}
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
get_request_0 (int* q, int rw)
|
||||
{
|
||||
if (blk_oversized_queue_0 (q))
|
||||
{
|
||||
if ((rw == 1) || (rw == 0))
|
||||
return -33;
|
||||
return 0;
|
||||
}
|
||||
return -100;
|
||||
}
|
||||
|
||||
|
||||
/* 1x tst, 1x cmp/gt, 1x cmp/hi
|
||||
On SH2A mem loads/stores have a wrong length of 4 bytes and thus will
|
||||
not be placed in a delay slot. This introduces an extra cmp/gt insn. */
|
||||
static inline int
|
||||
blk_oversized_queue_1 (int* q)
|
||||
{
|
||||
if (q[2])
|
||||
return q[1] > 5;
|
||||
return (q[0] > 5);
|
||||
}
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
get_request_1 (int* q, int rw)
|
||||
{
|
||||
if (blk_oversized_queue_1 (q))
|
||||
{
|
||||
if ((rw == 1) || (rw == 0))
|
||||
return -33;
|
||||
return 0;
|
||||
}
|
||||
return -100;
|
||||
}
|
||||
|
||||
|
||||
/* 1x tst, 1x cmp/gt, 1x cmp/hi, 1x cmp/hi */
|
||||
static inline int
|
||||
blk_oversized_queue_2 (int* q)
|
||||
{
|
||||
if (q[2])
|
||||
return q[1] > 5;
|
||||
return (q[0] < 5);
|
||||
}
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
get_request_2 (int* q, int rw)
|
||||
{
|
||||
if (blk_oversized_queue_2 (q))
|
||||
{
|
||||
if ((rw == 1) || (rw == 0))
|
||||
return -33;
|
||||
return 0;
|
||||
}
|
||||
return -100;
|
||||
}
|
||||
|
||||
|
||||
/* 2x tst, 1x cmp/hi, 1x not */
|
||||
static inline int
|
||||
blk_oversized_queue_5 (int* q)
|
||||
{
|
||||
if (q[2])
|
||||
return q[1] != 0;
|
||||
return q[0] == 0;
|
||||
}
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
get_request_5 (int* q, int rw)
|
||||
{
|
||||
if (blk_oversized_queue_5 (q))
|
||||
{
|
||||
if ((rw == 1) || (rw == 0))
|
||||
return -33;
|
||||
return 0;
|
||||
}
|
||||
return -100;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user