Enable shrink wrapping for the RISC-V target.

This commit implements the target macros (TARGET_SHRINK_WRAP_*) that
enable separate shrink wrapping for function prologues/epilogues in
RISC-V.

Tested against SPEC CPU 2017, this change always has a net-positive
effect on the dynamic instruction count.  See the following table for
the breakdown on how this reduces the number of dynamic instructions
per workload on a like-for-like (i.e., same config file; suppressing
shrink-wrapping with -fno-shrink-wrap):

                             # dynamic instructions
                w/o shrink-wrap   w/ shrink-wrap      reduction
500.perlbench_r   1265716786593    1262156218578     3560568015   0.28%
500.perlbench_r    779224795689     765337009025    13887786664   1.78%
500.perlbench_r    724087331471     711307152522    12780178949   1.77%
502.gcc_r          204259864844     194517006339     9742858505   4.77%
502.gcc_r          244047794302     231555834722    12491959580   5.12%
502.gcc_r          230896069400     221877703011     9018366389   3.91%
502.gcc_r          192130616624     183856450605     8274166019   4.31%
502.gcc_r          258875074079     247756203226    11118870853   4.30%
505.mcf_r          662653430325     660678680547     1974749778   0.30%
520.omnetpp_r      985114167068     934191310154    50922856914   5.17%
523.xalancbmk_r    927037633578     921688937650     5348695928   0.58%
525.x264_r         490953958454     490565583447      388375007   0.08%
525.x264_r        1994662294421    1993171932425     1490361996   0.07%
525.x264_r        1897617120450    1896062750609     1554369841   0.08%
531.deepsjeng_r   1695189878907    1669304130411    25885748496   1.53%
541.leela_r       1925941222222    1897900861198    28040361024   1.46%
548.exchange2_r   2073816227944    2073816226729           1215   0.00%
557.xz_r           379572090003     379057409041      514680962   0.14%
557.xz_r           953117469352     952680431430      437037922   0.05%
557.xz_r           536859579650     536456690164      402889486   0.08%
                 18421773405376   18223938521833   197834883543   1.07%  totals

Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>

gcc/ChangeLog:

	* config/riscv/riscv.cc (struct machine_function): Add array to store
	register wrapping information.
	(riscv_for_each_saved_reg): Skip registers that are wrapped separetely.
	(riscv_get_separate_components): New function.
	(riscv_components_for_bb): Likewise.
	(riscv_disqualify_components): Likewise.
	(riscv_process_components): Likewise.
	(riscv_emit_prologue_components): Likewise.
	(riscv_emit_epilogue_components): Likewise.
	(riscv_set_handled_components): Likewise.
	(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define.
	(TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise.
	(TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise.
	(TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise.
	(TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise.
	(TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/shrink-wrap-1.c: New test.
This commit is contained in:
mtsamis 2022-08-24 15:22:58 +02:00 committed by Philipp Tomsich
parent 06c8f2ebf0
commit 705bae2351
2 changed files with 215 additions and 2 deletions

View File

@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see
#include "system.h"
#include "coretypes.h"
#include "target.h"
#include "backend.h"
#include "tm.h"
#include "rtl.h"
#include "regs.h"
@ -51,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
#include "optabs.h"
#include "bitmap.h"
#include "df.h"
#include "function-abi.h"
#include "diagnostic.h"
#include "builtins.h"
#include "predict.h"
@ -154,6 +156,11 @@ struct GTY(()) machine_function {
/* The current frame information, calculated by riscv_compute_frame_info. */
struct riscv_frame_info frame;
/* The components already handled by separate shrink-wrapping, which should
not be considered by the prologue and epilogue. */
bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
};
/* Information about a single argument. */
@ -4806,7 +4813,7 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
bool handle_reg = TRUE;
bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
/* If this is a normal return in a function that calls the eh_return
builtin, then do not restore the eh return data registers as that
@ -4837,9 +4844,11 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
{
bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
riscv_save_restore_reg (mode, regno, offset, fn);
if (handle_reg)
riscv_save_restore_reg (mode, regno, offset, fn);
offset -= GET_MODE_SIZE (mode).to_constant ();
}
}
@ -5321,6 +5330,162 @@ riscv_epilogue_uses (unsigned int regno)
return false;
}
/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
static sbitmap
riscv_get_separate_components (void)
{
HOST_WIDE_INT offset;
sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
bitmap_clear (components);
if (riscv_use_save_libcall (&cfun->machine->frame)
|| cfun->machine->interrupt_handler_p)
return components;
offset = cfun->machine->frame.gp_sp_offset.to_constant ();
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
/* We can only wrap registers that have small operand offsets.
For large offsets a pseudo register might be needed which
cannot be created during the shrink wrapping pass. */
if (SMALL_OPERAND (offset))
bitmap_set_bit (components, regno);
offset -= UNITS_PER_WORD;
}
offset = cfun->machine->frame.fp_sp_offset.to_constant ();
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
{
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
/* We can only wrap registers that have small operand offsets.
For large offsets a pseudo register might be needed which
cannot be created during the shrink wrapping pass. */
if (SMALL_OPERAND (offset))
bitmap_set_bit (components, regno);
offset -= GET_MODE_SIZE (mode).to_constant ();
}
/* Don't mess with the hard frame pointer. */
if (frame_pointer_needed)
bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
return components;
}
/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
static sbitmap
riscv_components_for_bb (basic_block bb)
{
bitmap in = DF_LIVE_IN (bb);
bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
bitmap_clear (components);
function_abi_aggregator callee_abis;
rtx_insn *insn;
FOR_BB_INSNS (bb, insn)
if (CALL_P (insn))
callee_abis.note_callee_abi (insn_callee_abi (insn));
HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
/* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
if (!fixed_regs[regno]
&& !crtl->abi->clobbers_full_reg_p (regno)
&& (TEST_HARD_REG_BIT (extra_caller_saves, regno)
|| bitmap_bit_p (in, regno)
|| bitmap_bit_p (gen, regno)
|| bitmap_bit_p (kill, regno)))
bitmap_set_bit (components, regno);
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
if (!fixed_regs[regno]
&& !crtl->abi->clobbers_full_reg_p (regno)
&& (TEST_HARD_REG_BIT (extra_caller_saves, regno)
|| bitmap_bit_p (in, regno)
|| bitmap_bit_p (gen, regno)
|| bitmap_bit_p (kill, regno)))
bitmap_set_bit (components, regno);
return components;
}
/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
static void
riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
{
/* Nothing to do for riscv. */
}
static void
riscv_process_components (sbitmap components, bool prologue_p)
{
HOST_WIDE_INT offset;
riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
offset = cfun->machine->frame.gp_sp_offset.to_constant ();
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
if (bitmap_bit_p (components, regno))
riscv_save_restore_reg (word_mode, regno, offset, fn);
offset -= UNITS_PER_WORD;
}
offset = cfun->machine->frame.fp_sp_offset.to_constant ();
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
{
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
if (bitmap_bit_p (components, regno))
riscv_save_restore_reg (mode, regno, offset, fn);
offset -= GET_MODE_SIZE (mode).to_constant ();
}
}
/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
static void
riscv_emit_prologue_components (sbitmap components)
{
riscv_process_components (components, true);
}
/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
static void
riscv_emit_epilogue_components (sbitmap components)
{
riscv_process_components (components, false);
}
static void
riscv_set_handled_components (sbitmap components)
{
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
if (bitmap_bit_p (components, regno))
cfun->machine->reg_is_wrapped_separately[regno] = true;
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
if (bitmap_bit_p (components, regno))
cfun->machine->reg_is_wrapped_separately[regno] = true;
}
/* Return nonzero if this function is known to have a null epilogue.
This allows the optimizer to omit jumps to jumps if no stack
was created. */
@ -6684,6 +6849,30 @@ riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
#undef TARGET_FUNCTION_ARG_BOUNDARY
#define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
riscv_get_separate_components
#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
riscv_components_for_bb
#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
riscv_disqualify_components
#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
riscv_emit_prologue_components
#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
riscv_emit_epilogue_components
#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
riscv_set_handled_components
/* The generic ELF target does not always have TLS support. */
#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS

View File

@ -0,0 +1,24 @@
/* { dg-do compile } */
/* { dg-options "-fshrink-wrap" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" } } */
void g(void);
void f(int x)
{
if (x)
{
/* Force saving of some callee-saved registers. With shrink wrapping
enabled these only need to be saved if x is non-zero. */
register int s2 asm("18") = x;
register int s3 asm("19") = x;
register int s4 asm("20") = x;
asm("" : : "r"(s2));
asm("" : : "r"(s3));
asm("" : : "r"(s4));
g();
}
}
/* The resulting code should do nothing if X is 0. */
/* { dg-final { scan-assembler "bne\ta0,zero,.*\n.*ret" } } */