mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-31 11:10:51 +08:00
optabs.c (expand_abs_nojump): Update BRANCH_COST call.
* optabs.c (expand_abs_nojump): Update BRANCH_COST call. * fold-cost.c (LOGICAL_OP_NON_SHORT_CIRCUIT, fold_truthop): Likewise. * dojump.c (do_jump): Likewise. * ifcvt.c (MAX_CONDITIONAL_EXECUTE): Likewise. (note-if_info): Add BRANCH_COST. (noce_try_store_flag_constants, noce_try_addcc, noce_try_store_flag_mask, noce_try_cmove_arith, noce_try_cmove_arith, noce_try_cmove_arith, noce_find_if_block, find_if_case_1, find_if_case_2): Use compuated branch cost. * expr.h (BRANCH_COST): Update default. * predict.c (predictable_edge_p): New function. * expmed.c (expand_smod_pow2, expand_sdiv_pow2, emit_store_flag): Update BRANCH_COST call. * basic-block.h (predictable_edge_p): Declare. * config/alpha/alpha.h (BRANCH_COST): Update. * config/frv/frv.h (BRANCH_COST): Update. * config/s390/s390.h (BRANCH_COST): Update. * config/spu/spu.h (BRANCH_COST): Update. * config/sparc/sparc.h (BRANCH_COST): Update. * config/m32r/m32r.h (BRANCH_COST): Update. * config/i386/i386.h (BRANCH_COST): Update. * config/i386/i386.c (ix86_expand_int_movcc): Update use of BRANCH_COST. * config/sh/sh.h (BRANCH_COST): Update. * config/pdp11/pdp11.h (BRANCH_COST): Update. * config/avr/avr.h (BRANCH_COST): Update. * config/crx/crx.h (BRANCH_COST): Update. * config/xtensa/xtensa.h (BRANCH_COST): Update. * config/stormy16/stormy16.h (BRANCH_COST): Update. * config/m68hc11/m68hc11.h (BRANCH_COST): Update. * config/iq2000/iq2000.h (BRANCH_COST): Update. * config/ia64/ia64.h (BRANCH_COST): Update. * config/rs6000/rs6000.h (BRANCH_COST): Update. * config/arc/arc.h (BRANCH_COST): Update. * config/score/score.h (BRANCH_COST): Update. * config/arm/arm.h (BRANCH_COST): Update. * config/pa/pa.h (BRANCH_COST): Update. * config/mips/mips.h (BRANCH_COST): Update. * config/vax/vax.h (BRANCH_COST): Update. * config/h8300/h8300.h (BRANCH_COST): Update. * params.def (PARAM_PREDICTABLE_BRANCH_OUTCOME): New. * doc/invoke.texi (predictable-branch-cost-outcome): Document. * doc/tm.texi (BRANCH_COST): Update. From-SVN: r139804
This commit is contained in:
parent
b1bdaf4061
commit
3a4fd356e0
@ -1,3 +1,48 @@
|
||||
2008-08-30 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* optabs.c (expand_abs_nojump): Update BRANCH_COST call.
|
||||
* fold-cost.c (LOGICAL_OP_NON_SHORT_CIRCUIT, fold_truthop): Likewise.
|
||||
* dojump.c (do_jump): Likewise.
|
||||
* ifcvt.c (MAX_CONDITIONAL_EXECUTE): Likewise.
|
||||
(note-if_info): Add BRANCH_COST.
|
||||
(noce_try_store_flag_constants, noce_try_addcc, noce_try_store_flag_mask,
|
||||
noce_try_cmove_arith, noce_try_cmove_arith, noce_try_cmove_arith,
|
||||
noce_find_if_block, find_if_case_1, find_if_case_2): Use compuated
|
||||
branch cost.
|
||||
* expr.h (BRANCH_COST): Update default.
|
||||
* predict.c (predictable_edge_p): New function.
|
||||
* expmed.c (expand_smod_pow2, expand_sdiv_pow2, emit_store_flag):
|
||||
Update BRANCH_COST call.
|
||||
* basic-block.h (predictable_edge_p): Declare.
|
||||
* config/alpha/alpha.h (BRANCH_COST): Update.
|
||||
* config/frv/frv.h (BRANCH_COST): Update.
|
||||
* config/s390/s390.h (BRANCH_COST): Update.
|
||||
* config/spu/spu.h (BRANCH_COST): Update.
|
||||
* config/sparc/sparc.h (BRANCH_COST): Update.
|
||||
* config/m32r/m32r.h (BRANCH_COST): Update.
|
||||
* config/i386/i386.h (BRANCH_COST): Update.
|
||||
* config/i386/i386.c (ix86_expand_int_movcc): Update use of BRANCH_COST.
|
||||
* config/sh/sh.h (BRANCH_COST): Update.
|
||||
* config/pdp11/pdp11.h (BRANCH_COST): Update.
|
||||
* config/avr/avr.h (BRANCH_COST): Update.
|
||||
* config/crx/crx.h (BRANCH_COST): Update.
|
||||
* config/xtensa/xtensa.h (BRANCH_COST): Update.
|
||||
* config/stormy16/stormy16.h (BRANCH_COST): Update.
|
||||
* config/m68hc11/m68hc11.h (BRANCH_COST): Update.
|
||||
* config/iq2000/iq2000.h (BRANCH_COST): Update.
|
||||
* config/ia64/ia64.h (BRANCH_COST): Update.
|
||||
* config/rs6000/rs6000.h (BRANCH_COST): Update.
|
||||
* config/arc/arc.h (BRANCH_COST): Update.
|
||||
* config/score/score.h (BRANCH_COST): Update.
|
||||
* config/arm/arm.h (BRANCH_COST): Update.
|
||||
* config/pa/pa.h (BRANCH_COST): Update.
|
||||
* config/mips/mips.h (BRANCH_COST): Update.
|
||||
* config/vax/vax.h (BRANCH_COST): Update.
|
||||
* config/h8300/h8300.h (BRANCH_COST): Update.
|
||||
* params.def (PARAM_PREDICTABLE_BRANCH_OUTCOME): New.
|
||||
* doc/invoke.texi (predictable-branch-cost-outcome): Document.
|
||||
* doc/tm.texi (BRANCH_COST): Update.
|
||||
|
||||
2008-08-30 Samuel Tardieu <sam@rfc1149.net>
|
||||
|
||||
PR target/37283
|
||||
|
@ -852,6 +852,7 @@ extern void guess_outgoing_edge_probabilities (basic_block);
|
||||
extern void remove_predictions_associated_with_edge (edge);
|
||||
extern bool edge_probability_reliable_p (const_edge);
|
||||
extern bool br_prob_note_reliable_p (const_rtx);
|
||||
extern bool predictable_edge_p (edge);
|
||||
|
||||
/* In cfg.c */
|
||||
extern void dump_regset (regset, FILE *);
|
||||
|
@ -640,7 +640,7 @@ extern int alpha_memory_latency;
|
||||
#define MEMORY_MOVE_COST(MODE,CLASS,IN) (2*alpha_memory_latency)
|
||||
|
||||
/* Provide the cost of a branch. Exact meaning under development. */
|
||||
#define BRANCH_COST 5
|
||||
#define BRANCH_COST(speed_p, predictable_p) 5
|
||||
|
||||
/* Stack layout; function entry, exit and calling. */
|
||||
|
||||
|
@ -824,7 +824,7 @@ arc_select_cc_mode (OP, X, Y)
|
||||
/* The cost of a branch insn. */
|
||||
/* ??? What's the right value here? Branches are certainly more
|
||||
expensive than reg->reg moves. */
|
||||
#define BRANCH_COST 2
|
||||
#define BRANCH_COST(speed_p, predictable_p) 2
|
||||
|
||||
/* Nonzero if access to memory by bytes is slow and undesirable.
|
||||
For RISC chips, it means that access to memory by bytes is no
|
||||
|
@ -2297,7 +2297,7 @@ do { \
|
||||
|
||||
/* Try to generate sequences that don't involve branches, we can then use
|
||||
conditional instructions */
|
||||
#define BRANCH_COST \
|
||||
#define BRANCH_COST(speed_p, predictable_p) \
|
||||
(TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
|
||||
|
||||
/* Position Independent Code. */
|
||||
|
@ -511,7 +511,7 @@ do { \
|
||||
(MODE)==SImode ? 8 : \
|
||||
(MODE)==SFmode ? 8 : 16)
|
||||
|
||||
#define BRANCH_COST 0
|
||||
#define BRANCH_COST(speed_p, predictable_p) 0
|
||||
|
||||
#define SLOW_BYTE_ACCESS 0
|
||||
|
||||
|
@ -420,7 +420,7 @@ struct cumulative_args
|
||||
/* Moving to processor register flushes pipeline - thus asymmetric */
|
||||
#define REGISTER_MOVE_COST(MODE, FROM, TO) ((TO != GENERAL_REGS) ? 8 : 2)
|
||||
/* Assume best case (branch predicted) */
|
||||
#define BRANCH_COST 2
|
||||
#define BRANCH_COST(speed_p, predictable_p) 2
|
||||
|
||||
#define SLOW_BYTE_ACCESS 1
|
||||
|
||||
|
@ -2193,7 +2193,7 @@ do { \
|
||||
|
||||
/* A C expression for the cost of a branch instruction. A value of 1 is the
|
||||
default; other values are interpreted relative to that. */
|
||||
#define BRANCH_COST frv_branch_cost_int
|
||||
#define BRANCH_COST(speed_p, predictable_p) frv_branch_cost_int
|
||||
|
||||
/* Define this macro as a C expression which is nonzero if accessing less than
|
||||
a word of memory (i.e. a `char' or a `short') is no faster than accessing a
|
||||
|
@ -1004,7 +1004,7 @@ struct cum_arg
|
||||
#define DELAY_SLOT_LENGTH(JUMP) \
|
||||
(NEXT_INSN (PREV_INSN (JUMP)) == JUMP ? 0 : 2)
|
||||
|
||||
#define BRANCH_COST 0
|
||||
#define BRANCH_COST(speed_p, predictable_p) 0
|
||||
|
||||
/* Tell final.c how to eliminate redundant test instructions. */
|
||||
|
||||
|
@ -14636,7 +14636,8 @@ ix86_expand_int_movcc (rtx operands[])
|
||||
*/
|
||||
|
||||
if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
|
||||
&& BRANCH_COST >= 2)
|
||||
&& BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) >= 2)
|
||||
{
|
||||
if (cf == 0)
|
||||
{
|
||||
@ -14721,7 +14722,7 @@ ix86_expand_int_movcc (rtx operands[])
|
||||
optab op;
|
||||
rtx var, orig_out, out, tmp;
|
||||
|
||||
if (BRANCH_COST <= 2)
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
|
||||
return 0; /* FAIL */
|
||||
|
||||
/* If one of the two operands is an interesting constant, load a
|
||||
|
@ -1975,7 +1975,8 @@ do { \
|
||||
/* A C expression for the cost of a branch instruction. A value of 1
|
||||
is the default; other values are interpreted relative to that. */
|
||||
|
||||
#define BRANCH_COST ix86_branch_cost
|
||||
#define BRANCH_COST(speed_p, predictable_p) \
|
||||
(!(speed_p) ? 2 : (predictable_p) ? 0 : ix86_branch_cost)
|
||||
|
||||
/* Define this macro as a C expression which is nonzero if accessing
|
||||
less than a word of memory (i.e. a `char' or a `short') is no
|
||||
|
@ -1384,7 +1384,7 @@ do { \
|
||||
many additional insn groups we run into, vs how good the dynamic
|
||||
branch predictor is. */
|
||||
|
||||
#define BRANCH_COST 6
|
||||
#define BRANCH_COST(speed_p, predictable_p) 6
|
||||
|
||||
/* Define this macro as a C expression which is nonzero if accessing less than
|
||||
a word of memory (i.e. a `char' or a `short') is no faster than accessing a
|
||||
|
@ -624,7 +624,7 @@ typedef struct iq2000_args
|
||||
#define MEMORY_MOVE_COST(MODE,CLASS,TO_P) \
|
||||
(TO_P ? 2 : 16)
|
||||
|
||||
#define BRANCH_COST 2
|
||||
#define BRANCH_COST(speed_p, predictable_p) 2
|
||||
|
||||
#define SLOW_BYTE_ACCESS 1
|
||||
|
||||
|
@ -1224,7 +1224,7 @@ L2: .word STATIC
|
||||
/* A value of 2 here causes GCC to avoid using branches in comparisons like
|
||||
while (a < N && a). Branches aren't that expensive on the M32R so
|
||||
we define this as 1. Defining it as 2 had a heavy hit in fp-bit.c. */
|
||||
#define BRANCH_COST ((TARGET_BRANCH_COST) ? 2 : 1)
|
||||
#define BRANCH_COST(speed_p, predictable_p) ((TARGET_BRANCH_COST) ? 2 : 1)
|
||||
|
||||
/* Nonzero if access to memory by bytes is slow and undesirable.
|
||||
For RISC chips, it means that access to memory by bytes is no
|
||||
|
@ -1266,7 +1266,7 @@ extern unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER];
|
||||
|
||||
Pretend branches are cheap because GCC generates sub-optimal code
|
||||
for the default value. */
|
||||
#define BRANCH_COST 0
|
||||
#define BRANCH_COST(speed_p, predictable_p) 0
|
||||
|
||||
/* Nonzero if access to memory by bytes is slow and undesirable. */
|
||||
#define SLOW_BYTE_ACCESS 0
|
||||
|
@ -2557,7 +2557,7 @@ typedef struct mips_args {
|
||||
/* A C expression for the cost of a branch instruction. A value of
|
||||
1 is the default; other values are interpreted relative to that. */
|
||||
|
||||
#define BRANCH_COST mips_branch_cost
|
||||
#define BRANCH_COST(speed_p, predictable_p) mips_branch_cost
|
||||
#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
|
||||
|
||||
/* If defined, modifies the length assigned to instruction INSN as a
|
||||
|
@ -1570,7 +1570,7 @@ do { \
|
||||
: 2)
|
||||
|
||||
/* Adjust the cost of branches. */
|
||||
#define BRANCH_COST (pa_cpu == PROCESSOR_8000 ? 2 : 1)
|
||||
#define BRANCH_COST(speed_p, predictable_p) (pa_cpu == PROCESSOR_8000 ? 2 : 1)
|
||||
|
||||
/* Handling the special cases is going to get too complicated for a macro,
|
||||
just call `pa_adjust_insn_length' to do the real work. */
|
||||
|
@ -1057,7 +1057,7 @@ JMP FUNCTION 0x0058 0x0000 <- FUNCTION
|
||||
/* there is no point in avoiding branches on a pdp,
|
||||
since branches are really cheap - I just want to find out
|
||||
how much difference the BRANCH_COST macro makes in code */
|
||||
#define BRANCH_COST (TARGET_BRANCH_CHEAP ? 0 : 1)
|
||||
#define BRANCH_COST(speed_p, predictable_p) (TARGET_BRANCH_CHEAP ? 0 : 1)
|
||||
|
||||
|
||||
#define COMPARE_FLAG_MODE HImode
|
||||
|
@ -967,7 +967,7 @@ extern enum rs6000_nop_insertion rs6000_sched_insert_nops;
|
||||
Set this to 3 on the RS/6000 since that is roughly the average cost of an
|
||||
unscheduled conditional branch. */
|
||||
|
||||
#define BRANCH_COST 3
|
||||
#define BRANCH_COST(speed_p, predictable_p) 3
|
||||
|
||||
/* Override BRANCH_COST heuristic which empirically produces worse
|
||||
performance for removing short circuiting from the logical ops. */
|
||||
|
@ -828,7 +828,7 @@ extern struct rtx_def *s390_compare_op0, *s390_compare_op1, *s390_compare_emitte
|
||||
|
||||
/* A C expression for the cost of a branch instruction. A value of 1
|
||||
is the default; other values are interpreted relative to that. */
|
||||
#define BRANCH_COST 1
|
||||
#define BRANCH_COST(speed_p, predictable_p) 1
|
||||
|
||||
/* Nonzero if access to memory by bytes is slow and undesirable. */
|
||||
#define SLOW_BYTE_ACCESS 1
|
||||
|
@ -793,7 +793,7 @@ typedef struct score_args
|
||||
(4 + memory_move_secondary_cost ((MODE), (CLASS), (TO_P)))
|
||||
|
||||
/* Try to generate sequences that don't involve branches. */
|
||||
#define BRANCH_COST 2
|
||||
#define BRANCH_COST(speed_p, predictable_p) 2
|
||||
|
||||
/* Nonzero if access to memory by bytes is slow and undesirable. */
|
||||
#define SLOW_BYTE_ACCESS 1
|
||||
|
@ -2847,7 +2847,8 @@ struct sh_args {
|
||||
The SH1 does not have delay slots, hence we get a pipeline stall
|
||||
at every branch. The SH4 is superscalar, so the single delay slot
|
||||
is not sufficient to keep both pipelines filled. */
|
||||
#define BRANCH_COST (TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1)
|
||||
#define BRANCH_COST(speed_p, predictable_p) \
|
||||
(TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1)
|
||||
|
||||
/* Assembler output control. */
|
||||
|
||||
|
@ -2196,7 +2196,7 @@ do { \
|
||||
On Niagara-2, a not-taken branch costs 1 cycle whereas a taken
|
||||
branch costs 6 cycles. */
|
||||
|
||||
#define BRANCH_COST \
|
||||
#define BRANCH_COST (speed_p, predictable_p) \
|
||||
((sparc_cpu == PROCESSOR_V9 \
|
||||
|| sparc_cpu == PROCESSOR_ULTRASPARC) \
|
||||
? 7 \
|
||||
|
@ -434,7 +434,7 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
|
||||
|
||||
/* Costs */
|
||||
|
||||
#define BRANCH_COST spu_branch_cost
|
||||
#define BRANCH_COST(speed_p, predictable_p) spu_branch_cost
|
||||
|
||||
#define SLOW_BYTE_ACCESS 0
|
||||
|
||||
|
@ -587,7 +587,7 @@ do { \
|
||||
|
||||
#define MEMORY_MOVE_COST(M,C,I) (5 + memory_move_secondary_cost (M, C, I))
|
||||
|
||||
#define BRANCH_COST 5
|
||||
#define BRANCH_COST(speed_p, predictable_p) 5
|
||||
|
||||
#define SLOW_BYTE_ACCESS 0
|
||||
|
||||
|
@ -648,7 +648,7 @@ enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES };
|
||||
Branches are extremely cheap on the VAX while the shift insns often
|
||||
used to replace branches can be expensive. */
|
||||
|
||||
#define BRANCH_COST 0
|
||||
#define BRANCH_COST(speed_p, predictable_p) 0
|
||||
|
||||
/* Tell final.c how to eliminate redundant test instructions. */
|
||||
|
||||
|
@ -887,7 +887,7 @@ typedef struct xtensa_args
|
||||
|
||||
#define MEMORY_MOVE_COST(MODE, CLASS, IN) 4
|
||||
|
||||
#define BRANCH_COST 3
|
||||
#define BRANCH_COST(speed_p, predictable_p) 3
|
||||
|
||||
/* How to refer to registers in assembler output.
|
||||
This sequence is indexed by compiler's hard-register-number (see above). */
|
||||
|
@ -6900,6 +6900,10 @@ to the hottest structure frequency in the program is less than this
|
||||
parameter, then structure reorganization is not applied to this structure.
|
||||
The default is 10.
|
||||
|
||||
@item predictable-branch-cost-outcome
|
||||
When branch is predicted to be taken with probability lower than this threshold
|
||||
(in percent), then it is considered well predictable. The default is 10.
|
||||
|
||||
@item max-crossjump-edges
|
||||
The maximum number of incoming edges to consider for crossjumping.
|
||||
The algorithm used by @option{-fcrossjumping} is @math{O(N^2)} in
|
||||
|
@ -5874,9 +5874,14 @@ value to the result of that function. The arguments to that function
|
||||
are the same as to this macro.
|
||||
@end defmac
|
||||
|
||||
@defmac BRANCH_COST
|
||||
A C expression for the cost of a branch instruction. A value of 1 is
|
||||
the default; other values are interpreted relative to that.
|
||||
@defmac BRANCH_COST (@var{speed_p}, @var{predictable_p})
|
||||
A C expression for the cost of a branch instruction. A value of 1 is the
|
||||
default; other values are interpreted relative to that. Parameter @var{speed_p}
|
||||
is true when the branch in question should be optimized for speed. When
|
||||
it is false, @code{BRANCH_COST} should be returning value optimal for code size
|
||||
rather then performance considerations. @var{predictable_p} is true for well
|
||||
predictable branches. On many architectures the @code{BRANCH_COST} can be
|
||||
reduced then.
|
||||
@end defmac
|
||||
|
||||
Here are additional macros which do not specify precise relative costs,
|
||||
|
@ -510,7 +510,9 @@ do_jump (tree exp, rtx if_false_label, rtx if_true_label)
|
||||
/* High branch cost, expand as the bitwise AND of the conditions.
|
||||
Do the same if the RHS has side effects, because we're effectively
|
||||
turning a TRUTH_AND_EXPR into a TRUTH_ANDIF_EXPR. */
|
||||
if (BRANCH_COST >= 4 || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) >= 4
|
||||
|| TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
|
||||
goto normal;
|
||||
|
||||
case TRUTH_ANDIF_EXPR:
|
||||
@ -531,7 +533,8 @@ do_jump (tree exp, rtx if_false_label, rtx if_true_label)
|
||||
/* High branch cost, expand as the bitwise OR of the conditions.
|
||||
Do the same if the RHS has side effects, because we're effectively
|
||||
turning a TRUTH_OR_EXPR into a TRUTH_ORIF_EXPR. */
|
||||
if (BRANCH_COST >= 4 || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (), false)>= 4
|
||||
|| TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
|
||||
goto normal;
|
||||
|
||||
case TRUTH_ORIF_EXPR:
|
||||
|
26
gcc/expmed.c
26
gcc/expmed.c
@ -3492,7 +3492,7 @@ expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
|
||||
result = gen_reg_rtx (mode);
|
||||
|
||||
/* Avoid conditional branches when they're expensive. */
|
||||
if (BRANCH_COST >= 2
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
|
||||
&& optimize_insn_for_speed_p ())
|
||||
{
|
||||
rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
|
||||
@ -3592,7 +3592,9 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
|
||||
logd = floor_log2 (d);
|
||||
shift = build_int_cst (NULL_TREE, logd);
|
||||
|
||||
if (d == 2 && BRANCH_COST >= 1)
|
||||
if (d == 2
|
||||
&& BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) >= 1)
|
||||
{
|
||||
temp = gen_reg_rtx (mode);
|
||||
temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
|
||||
@ -3602,7 +3604,8 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
|
||||
}
|
||||
|
||||
#ifdef HAVE_conditional_move
|
||||
if (BRANCH_COST >= 2)
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (), false)
|
||||
>= 2)
|
||||
{
|
||||
rtx temp2;
|
||||
|
||||
@ -3631,7 +3634,8 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (BRANCH_COST >= 2)
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) >= 2)
|
||||
{
|
||||
int ushift = GET_MODE_BITSIZE (mode) - logd;
|
||||
|
||||
@ -5345,7 +5349,8 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
|
||||
comparison with zero. Don't do any of these cases if branches are
|
||||
very cheap. */
|
||||
|
||||
if (BRANCH_COST > 0
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) > 0
|
||||
&& GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
|
||||
&& op1 != const0_rtx)
|
||||
{
|
||||
@ -5368,10 +5373,12 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
|
||||
do LE and GT if branches are expensive since they are expensive on
|
||||
2-operand machines. */
|
||||
|
||||
if (BRANCH_COST == 0
|
||||
if (BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) == 0
|
||||
|| GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
|
||||
|| (code != EQ && code != NE
|
||||
&& (BRANCH_COST <= 1 || (code != LE && code != GT))))
|
||||
&& (BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) <= 1 || (code != LE && code != GT))))
|
||||
return 0;
|
||||
|
||||
/* See what we need to return. We can only return a 1, -1, or the
|
||||
@ -5467,7 +5474,10 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
|
||||
that "or", which is an extra insn, so we only handle EQ if branches
|
||||
are expensive. */
|
||||
|
||||
if (tem == 0 && (code == NE || BRANCH_COST > 1))
|
||||
if (tem == 0
|
||||
&& (code == NE
|
||||
|| BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) > 1))
|
||||
{
|
||||
if (rtx_equal_p (subtarget, op0))
|
||||
subtarget = 0;
|
||||
|
@ -36,7 +36,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
/* The default branch cost is 1. */
|
||||
#ifndef BRANCH_COST
|
||||
#define BRANCH_COST 1
|
||||
#define BRANCH_COST(speed_p, predictable_p) 1
|
||||
#endif
|
||||
|
||||
/* This is the 4th arg to `expand_expr'.
|
||||
|
@ -5109,7 +5109,9 @@ fold_cond_expr_with_comparison (tree type, tree arg0, tree arg1, tree arg2)
|
||||
|
||||
|
||||
#ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
|
||||
#define LOGICAL_OP_NON_SHORT_CIRCUIT (BRANCH_COST >= 2)
|
||||
#define LOGICAL_OP_NON_SHORT_CIRCUIT \
|
||||
(BRANCH_COST (!cfun || optimize_function_for_speed_p (cfun), \
|
||||
false) >= 2)
|
||||
#endif
|
||||
|
||||
/* EXP is some logical combination of boolean tests. See if we can
|
||||
@ -5357,7 +5359,8 @@ fold_truthop (enum tree_code code, tree truth_type, tree lhs, tree rhs)
|
||||
that can be merged. Avoid doing this if the RHS is a floating-point
|
||||
comparison since those can trap. */
|
||||
|
||||
if (BRANCH_COST >= 2
|
||||
if (BRANCH_COST (!cfun || optimize_function_for_speed_p (cfun),
|
||||
false) >= 2
|
||||
&& ! FLOAT_TYPE_P (TREE_TYPE (rl_arg))
|
||||
&& simple_operand_p (rl_arg)
|
||||
&& simple_operand_p (rr_arg))
|
||||
|
39
gcc/ifcvt.c
39
gcc/ifcvt.c
@ -67,7 +67,9 @@
|
||||
#endif
|
||||
|
||||
#ifndef MAX_CONDITIONAL_EXECUTE
|
||||
#define MAX_CONDITIONAL_EXECUTE (BRANCH_COST + 1)
|
||||
#define MAX_CONDITIONAL_EXECUTE \
|
||||
(BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
|
||||
+ 1)
|
||||
#endif
|
||||
|
||||
#define IFCVT_MULTIPLE_DUMPS 1
|
||||
@ -626,6 +628,9 @@ struct noce_if_info
|
||||
from TEST_BB. For the noce transformations, we allow the symmetric
|
||||
form as well. */
|
||||
bool then_else_reversed;
|
||||
|
||||
/* Estimated cost of the particular branch instruction. */
|
||||
int branch_cost;
|
||||
};
|
||||
|
||||
static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int);
|
||||
@ -963,20 +968,20 @@ noce_try_store_flag_constants (struct noce_if_info *if_info)
|
||||
normalize = 0;
|
||||
else if (ifalse == 0 && exact_log2 (itrue) >= 0
|
||||
&& (STORE_FLAG_VALUE == 1
|
||||
|| BRANCH_COST >= 2))
|
||||
|| if_info->branch_cost >= 2))
|
||||
normalize = 1;
|
||||
else if (itrue == 0 && exact_log2 (ifalse) >= 0 && can_reverse
|
||||
&& (STORE_FLAG_VALUE == 1 || BRANCH_COST >= 2))
|
||||
&& (STORE_FLAG_VALUE == 1 || if_info->branch_cost >= 2))
|
||||
normalize = 1, reversep = 1;
|
||||
else if (itrue == -1
|
||||
&& (STORE_FLAG_VALUE == -1
|
||||
|| BRANCH_COST >= 2))
|
||||
|| if_info->branch_cost >= 2))
|
||||
normalize = -1;
|
||||
else if (ifalse == -1 && can_reverse
|
||||
&& (STORE_FLAG_VALUE == -1 || BRANCH_COST >= 2))
|
||||
&& (STORE_FLAG_VALUE == -1 || if_info->branch_cost >= 2))
|
||||
normalize = -1, reversep = 1;
|
||||
else if ((BRANCH_COST >= 2 && STORE_FLAG_VALUE == -1)
|
||||
|| BRANCH_COST >= 3)
|
||||
else if ((if_info->branch_cost >= 2 && STORE_FLAG_VALUE == -1)
|
||||
|| if_info->branch_cost >= 3)
|
||||
normalize = -1;
|
||||
else
|
||||
return FALSE;
|
||||
@ -1107,7 +1112,7 @@ noce_try_addcc (struct noce_if_info *if_info)
|
||||
|
||||
/* If that fails, construct conditional increment or decrement using
|
||||
setcc. */
|
||||
if (BRANCH_COST >= 2
|
||||
if (if_info->branch_cost >= 2
|
||||
&& (XEXP (if_info->a, 1) == const1_rtx
|
||||
|| XEXP (if_info->a, 1) == constm1_rtx))
|
||||
{
|
||||
@ -1158,7 +1163,7 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
|
||||
int reversep;
|
||||
|
||||
reversep = 0;
|
||||
if ((BRANCH_COST >= 2
|
||||
if ((if_info->branch_cost >= 2
|
||||
|| STORE_FLAG_VALUE == -1)
|
||||
&& ((if_info->a == const0_rtx
|
||||
&& rtx_equal_p (if_info->b, if_info->x))
|
||||
@ -1317,7 +1322,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
/* ??? FIXME: Magic number 5. */
|
||||
if (cse_not_expected
|
||||
&& MEM_P (a) && MEM_P (b)
|
||||
&& BRANCH_COST >= 5)
|
||||
&& if_info->branch_cost >= 5)
|
||||
{
|
||||
a = XEXP (a, 0);
|
||||
b = XEXP (b, 0);
|
||||
@ -1347,7 +1352,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
if (insn_a)
|
||||
{
|
||||
insn_cost = insn_rtx_cost (PATTERN (insn_a));
|
||||
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST))
|
||||
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
@ -1356,7 +1361,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
if (insn_b)
|
||||
{
|
||||
insn_cost += insn_rtx_cost (PATTERN (insn_b));
|
||||
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST))
|
||||
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
@ -2831,6 +2836,8 @@ noce_find_if_block (basic_block test_bb,
|
||||
if_info.cond_earliest = cond_earliest;
|
||||
if_info.jump = jump;
|
||||
if_info.then_else_reversed = then_else_reversed;
|
||||
if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb),
|
||||
predictable_edge_p (then_edge));
|
||||
|
||||
/* Do the real work. */
|
||||
|
||||
@ -3597,7 +3604,9 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge)
|
||||
test_bb->index, then_bb->index);
|
||||
|
||||
/* THEN is small. */
|
||||
if (! cheap_bb_rtx_cost_p (then_bb, COSTS_N_INSNS (BRANCH_COST)))
|
||||
if (! cheap_bb_rtx_cost_p (then_bb,
|
||||
COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src),
|
||||
predictable_edge_p (then_edge)))))
|
||||
return FALSE;
|
||||
|
||||
/* Registers set are dead, or are predicable. */
|
||||
@ -3711,7 +3720,9 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge)
|
||||
test_bb->index, else_bb->index);
|
||||
|
||||
/* ELSE is small. */
|
||||
if (! cheap_bb_rtx_cost_p (else_bb, COSTS_N_INSNS (BRANCH_COST)))
|
||||
if (! cheap_bb_rtx_cost_p (else_bb,
|
||||
COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src),
|
||||
predictable_edge_p (else_edge)))))
|
||||
return FALSE;
|
||||
|
||||
/* Registers set are dead, or are predicable. */
|
||||
|
@ -3443,7 +3443,9 @@ expand_abs_nojump (enum machine_mode mode, rtx op0, rtx target,
|
||||
value of X as (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)),
|
||||
where W is the width of MODE. */
|
||||
|
||||
if (GET_MODE_CLASS (mode) == MODE_INT && BRANCH_COST >= 2)
|
||||
if (GET_MODE_CLASS (mode) == MODE_INT
|
||||
&& BRANCH_COST (optimize_insn_for_speed_p (),
|
||||
false) >= 2)
|
||||
{
|
||||
rtx extended = expand_shift (RSHIFT_EXPR, mode, op0,
|
||||
size_int (GET_MODE_BITSIZE (mode) - 1),
|
||||
|
@ -78,6 +78,13 @@ DEFPARAM (PARAM_STRUCT_REORG_COLD_STRUCT_RATIO,
|
||||
"The threshold ratio between current and hottest structure counts",
|
||||
10, 0, 100)
|
||||
|
||||
/* When branch is predicted to be taken with probability lower than this
|
||||
threshold (in percent), then it is considered well predictable. */
|
||||
DEFPARAM (PARAM_PREDICTABLE_BRANCH_OUTCOME,
|
||||
"predictable-branch-outcome",
|
||||
"Maximal esitmated outcome of branch considered predictable",
|
||||
2, 0, 50)
|
||||
|
||||
/* The single function inlining limit. This is the maximum size
|
||||
of a function counted in internal gcc instructions (not in
|
||||
real machine instructions) that is eligible for inlining
|
||||
|
@ -318,6 +318,23 @@ optimize_loop_nest_for_size_p (struct loop *loop)
|
||||
return !optimize_loop_nest_for_speed_p (loop);
|
||||
}
|
||||
|
||||
/* Return true when edge E is likely to be well predictable by branch
|
||||
predictor. */
|
||||
|
||||
bool
|
||||
predictable_edge_p (edge e)
|
||||
{
|
||||
if (profile_status == PROFILE_ABSENT)
|
||||
return false;
|
||||
if ((e->probability
|
||||
<= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
|
||||
|| (REG_BR_PROB_BASE - e->probability
|
||||
<= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* Set RTL expansion for BB profile. */
|
||||
|
||||
void
|
||||
|
Loading…
x
Reference in New Issue
Block a user