mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-04 08:40:33 +08:00
Enable shrink wrapping for the RISC-V target.
This commit implements the target macros (TARGET_SHRINK_WRAP_*) that enable separate shrink wrapping for function prologues/epilogues in RISC-V. Tested against SPEC CPU 2017, this change always has a net-positive effect on the dynamic instruction count. See the following table for the breakdown on how this reduces the number of dynamic instructions per workload on a like-for-like (i.e., same config file; suppressing shrink-wrapping with -fno-shrink-wrap): # dynamic instructions w/o shrink-wrap w/ shrink-wrap reduction 500.perlbench_r 1265716786593 1262156218578 3560568015 0.28% 500.perlbench_r 779224795689 765337009025 13887786664 1.78% 500.perlbench_r 724087331471 711307152522 12780178949 1.77% 502.gcc_r 204259864844 194517006339 9742858505 4.77% 502.gcc_r 244047794302 231555834722 12491959580 5.12% 502.gcc_r 230896069400 221877703011 9018366389 3.91% 502.gcc_r 192130616624 183856450605 8274166019 4.31% 502.gcc_r 258875074079 247756203226 11118870853 4.30% 505.mcf_r 662653430325 660678680547 1974749778 0.30% 520.omnetpp_r 985114167068 934191310154 50922856914 5.17% 523.xalancbmk_r 927037633578 921688937650 5348695928 0.58% 525.x264_r 490953958454 490565583447 388375007 0.08% 525.x264_r 1994662294421 1993171932425 1490361996 0.07% 525.x264_r 1897617120450 1896062750609 1554369841 0.08% 531.deepsjeng_r 1695189878907 1669304130411 25885748496 1.53% 541.leela_r 1925941222222 1897900861198 28040361024 1.46% 548.exchange2_r 2073816227944 2073816226729 1215 0.00% 557.xz_r 379572090003 379057409041 514680962 0.14% 557.xz_r 953117469352 952680431430 437037922 0.05% 557.xz_r 536859579650 536456690164 402889486 0.08% 18421773405376 18223938521833 197834883543 1.07% totals Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> gcc/ChangeLog: * config/riscv/riscv.cc (struct machine_function): Add array to store register wrapping information. (riscv_for_each_saved_reg): Skip registers that are wrapped separetely. (riscv_get_separate_components): New function. (riscv_components_for_bb): Likewise. (riscv_disqualify_components): Likewise. (riscv_process_components): Likewise. (riscv_emit_prologue_components): Likewise. (riscv_emit_epilogue_components): Likewise. (riscv_set_handled_components): Likewise. (TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define. (TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise. (TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise. (TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise. (TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise. (TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise. gcc/testsuite/ChangeLog: * gcc.target/riscv/shrink-wrap-1.c: New test.
This commit is contained in:
parent
06c8f2ebf0
commit
705bae2351
@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "target.h"
|
||||
#include "backend.h"
|
||||
#include "tm.h"
|
||||
#include "rtl.h"
|
||||
#include "regs.h"
|
||||
@ -51,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "optabs.h"
|
||||
#include "bitmap.h"
|
||||
#include "df.h"
|
||||
#include "function-abi.h"
|
||||
#include "diagnostic.h"
|
||||
#include "builtins.h"
|
||||
#include "predict.h"
|
||||
@ -154,6 +156,11 @@ struct GTY(()) machine_function {
|
||||
|
||||
/* The current frame information, calculated by riscv_compute_frame_info. */
|
||||
struct riscv_frame_info frame;
|
||||
|
||||
/* The components already handled by separate shrink-wrapping, which should
|
||||
not be considered by the prologue and epilogue. */
|
||||
bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
|
||||
|
||||
};
|
||||
|
||||
/* Information about a single argument. */
|
||||
@ -4806,7 +4813,7 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
|
||||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||||
{
|
||||
bool handle_reg = TRUE;
|
||||
bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
|
||||
|
||||
/* If this is a normal return in a function that calls the eh_return
|
||||
builtin, then do not restore the eh return data registers as that
|
||||
@ -4837,9 +4844,11 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
|
||||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||||
{
|
||||
bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
|
||||
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||||
|
||||
riscv_save_restore_reg (mode, regno, offset, fn);
|
||||
if (handle_reg)
|
||||
riscv_save_restore_reg (mode, regno, offset, fn);
|
||||
offset -= GET_MODE_SIZE (mode).to_constant ();
|
||||
}
|
||||
}
|
||||
@ -5321,6 +5330,162 @@ riscv_epilogue_uses (unsigned int regno)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
|
||||
|
||||
static sbitmap
|
||||
riscv_get_separate_components (void)
|
||||
{
|
||||
HOST_WIDE_INT offset;
|
||||
sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
||||
bitmap_clear (components);
|
||||
|
||||
if (riscv_use_save_libcall (&cfun->machine->frame)
|
||||
|| cfun->machine->interrupt_handler_p)
|
||||
return components;
|
||||
|
||||
offset = cfun->machine->frame.gp_sp_offset.to_constant ();
|
||||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||||
{
|
||||
/* We can only wrap registers that have small operand offsets.
|
||||
For large offsets a pseudo register might be needed which
|
||||
cannot be created during the shrink wrapping pass. */
|
||||
if (SMALL_OPERAND (offset))
|
||||
bitmap_set_bit (components, regno);
|
||||
|
||||
offset -= UNITS_PER_WORD;
|
||||
}
|
||||
|
||||
offset = cfun->machine->frame.fp_sp_offset.to_constant ();
|
||||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||||
{
|
||||
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||||
|
||||
/* We can only wrap registers that have small operand offsets.
|
||||
For large offsets a pseudo register might be needed which
|
||||
cannot be created during the shrink wrapping pass. */
|
||||
if (SMALL_OPERAND (offset))
|
||||
bitmap_set_bit (components, regno);
|
||||
|
||||
offset -= GET_MODE_SIZE (mode).to_constant ();
|
||||
}
|
||||
|
||||
/* Don't mess with the hard frame pointer. */
|
||||
if (frame_pointer_needed)
|
||||
bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
|
||||
|
||||
bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
|
||||
|
||||
return components;
|
||||
}
|
||||
|
||||
/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
|
||||
|
||||
static sbitmap
|
||||
riscv_components_for_bb (basic_block bb)
|
||||
{
|
||||
bitmap in = DF_LIVE_IN (bb);
|
||||
bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
|
||||
bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
|
||||
|
||||
sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
||||
bitmap_clear (components);
|
||||
|
||||
function_abi_aggregator callee_abis;
|
||||
rtx_insn *insn;
|
||||
FOR_BB_INSNS (bb, insn)
|
||||
if (CALL_P (insn))
|
||||
callee_abis.note_callee_abi (insn_callee_abi (insn));
|
||||
HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
|
||||
|
||||
/* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
|
||||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
if (!fixed_regs[regno]
|
||||
&& !crtl->abi->clobbers_full_reg_p (regno)
|
||||
&& (TEST_HARD_REG_BIT (extra_caller_saves, regno)
|
||||
|| bitmap_bit_p (in, regno)
|
||||
|| bitmap_bit_p (gen, regno)
|
||||
|| bitmap_bit_p (kill, regno)))
|
||||
bitmap_set_bit (components, regno);
|
||||
|
||||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
if (!fixed_regs[regno]
|
||||
&& !crtl->abi->clobbers_full_reg_p (regno)
|
||||
&& (TEST_HARD_REG_BIT (extra_caller_saves, regno)
|
||||
|| bitmap_bit_p (in, regno)
|
||||
|| bitmap_bit_p (gen, regno)
|
||||
|| bitmap_bit_p (kill, regno)))
|
||||
bitmap_set_bit (components, regno);
|
||||
|
||||
return components;
|
||||
}
|
||||
|
||||
/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
|
||||
|
||||
static void
|
||||
riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
|
||||
{
|
||||
/* Nothing to do for riscv. */
|
||||
}
|
||||
|
||||
static void
|
||||
riscv_process_components (sbitmap components, bool prologue_p)
|
||||
{
|
||||
HOST_WIDE_INT offset;
|
||||
riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
|
||||
|
||||
offset = cfun->machine->frame.gp_sp_offset.to_constant ();
|
||||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||||
{
|
||||
if (bitmap_bit_p (components, regno))
|
||||
riscv_save_restore_reg (word_mode, regno, offset, fn);
|
||||
|
||||
offset -= UNITS_PER_WORD;
|
||||
}
|
||||
|
||||
offset = cfun->machine->frame.fp_sp_offset.to_constant ();
|
||||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||||
{
|
||||
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||||
|
||||
if (bitmap_bit_p (components, regno))
|
||||
riscv_save_restore_reg (mode, regno, offset, fn);
|
||||
|
||||
offset -= GET_MODE_SIZE (mode).to_constant ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
|
||||
|
||||
static void
|
||||
riscv_emit_prologue_components (sbitmap components)
|
||||
{
|
||||
riscv_process_components (components, true);
|
||||
}
|
||||
|
||||
/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
|
||||
|
||||
static void
|
||||
riscv_emit_epilogue_components (sbitmap components)
|
||||
{
|
||||
riscv_process_components (components, false);
|
||||
}
|
||||
|
||||
static void
|
||||
riscv_set_handled_components (sbitmap components)
|
||||
{
|
||||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
if (bitmap_bit_p (components, regno))
|
||||
cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||||
|
||||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
if (bitmap_bit_p (components, regno))
|
||||
cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||||
}
|
||||
|
||||
/* Return nonzero if this function is known to have a null epilogue.
|
||||
This allows the optimizer to omit jumps to jumps if no stack
|
||||
was created. */
|
||||
@ -6684,6 +6849,30 @@ riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
|
||||
#undef TARGET_FUNCTION_ARG_BOUNDARY
|
||||
#define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
|
||||
|
||||
#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
|
||||
#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
|
||||
riscv_get_separate_components
|
||||
|
||||
#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
|
||||
#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
|
||||
riscv_components_for_bb
|
||||
|
||||
#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
|
||||
#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
|
||||
riscv_disqualify_components
|
||||
|
||||
#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
|
||||
#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
|
||||
riscv_emit_prologue_components
|
||||
|
||||
#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
|
||||
#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
|
||||
riscv_emit_epilogue_components
|
||||
|
||||
#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
|
||||
#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
|
||||
riscv_set_handled_components
|
||||
|
||||
/* The generic ELF target does not always have TLS support. */
|
||||
#ifdef HAVE_AS_TLS
|
||||
#undef TARGET_HAVE_TLS
|
||||
|
24
gcc/testsuite/gcc.target/riscv/shrink-wrap-1.c
Normal file
24
gcc/testsuite/gcc.target/riscv/shrink-wrap-1.c
Normal file
@ -0,0 +1,24 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-fshrink-wrap" } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" } } */
|
||||
|
||||
void g(void);
|
||||
|
||||
void f(int x)
|
||||
{
|
||||
if (x)
|
||||
{
|
||||
/* Force saving of some callee-saved registers. With shrink wrapping
|
||||
enabled these only need to be saved if x is non-zero. */
|
||||
register int s2 asm("18") = x;
|
||||
register int s3 asm("19") = x;
|
||||
register int s4 asm("20") = x;
|
||||
asm("" : : "r"(s2));
|
||||
asm("" : : "r"(s3));
|
||||
asm("" : : "r"(s4));
|
||||
g();
|
||||
}
|
||||
}
|
||||
|
||||
/* The resulting code should do nothing if X is 0. */
|
||||
/* { dg-final { scan-assembler "bne\ta0,zero,.*\n.*ret" } } */
|
Loading…
x
Reference in New Issue
Block a user