s390.c (MIN_UNROLL_PROBES): Define.

* config/s390/s390.c (MIN_UNROLL_PROBES): Define.
	(allocate_stack_space): New function, partially extracted from
	s390_emit_prologue.
	(s390_emit_prologue): Track offset to most recent stack probe.
	Code to allocate space moved into allocate_stack_space.
	Dump actions when no stack is allocated.
	(s390_prologue_plus_offset): New function.
	(s390_emit_stack_probe): Likewise.

	* gcc.dg/stack-check-5.c:  Add argument for s390.
	* lib/target-supports.exp:
	(check_effective_target_supports_stack_clash_protection): Enable for
	s390/s390x targets.

Co-Authored-By: Jeff Law <law@redhat.com>

From-SVN: r253049
This commit is contained in:
Andreas Krebbel 2017-09-21 04:30:16 +00:00 committed by Jeff Law
parent 12f713131e
commit d3347cd287
5 changed files with 261 additions and 29 deletions

View File

@ -1,3 +1,15 @@
2017-09-20 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Jeff Law <law@redhat.com>
* config/s390/s390.c (MIN_UNROLL_PROBES): Define.
(allocate_stack_space): New function, partially extracted from
s390_emit_prologue.
(s390_emit_prologue): Track offset to most recent stack probe.
Code to allocate space moved into allocate_stack_space.
Dump actions when no stack is allocated.
(s390_prologue_plus_offset): New function.
(s390_emit_stack_probe): Likewise.
2017-09-20 Alexandre Oliva <aoliva@redhat.com>
* common.opt (Wa, Wl, Wp, g, gz=): Add

View File

@ -11155,6 +11155,183 @@ pass_s390_early_mach::execute (function *fun)
} // anon namespace
/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
- push too big immediates to the literal pool and annotate the refs
- emit frame related notes for stack pointer changes. */
static rtx
s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
{
rtx insn;
rtx orig_offset = offset;
gcc_assert (REG_P (target));
gcc_assert (REG_P (reg));
gcc_assert (CONST_INT_P (offset));
if (offset == const0_rtx) /* lr/lgr */
{
insn = emit_move_insn (target, reg);
}
else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
{
insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
offset));
}
else
{
if (!satisfies_constraint_K (offset) /* ahi/aghi */
&& (!TARGET_EXTIMM
|| (!satisfies_constraint_Op (offset) /* alfi/algfi */
&& !satisfies_constraint_On (offset)))) /* slfi/slgfi */
offset = force_const_mem (Pmode, offset);
if (target != reg)
{
insn = emit_move_insn (target, reg);
RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
}
insn = emit_insn (gen_add2_insn (target, offset));
if (!CONST_INT_P (offset))
{
annotate_constant_pool_refs (&PATTERN (insn));
if (frame_related_p)
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (target,
gen_rtx_PLUS (Pmode, target,
orig_offset)));
}
}
RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
/* If this is a stack adjustment and we are generating a stack clash
prologue, then add a REG_STACK_CHECK note to signal that this insn
should be left alone. */
if (flag_stack_clash_protection && target == stack_pointer_rtx)
add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
return insn;
}
/* Emit a compare instruction with a volatile memory access as stack
probe. It does not waste store tags and does not clobber any
registers apart from the condition code. */
static void
s390_emit_stack_probe (rtx addr)
{
rtx tmp = gen_rtx_MEM (Pmode, addr);
MEM_VOLATILE_P (tmp) = 1;
s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
emit_insn (gen_blockage ());
}
/* Use a runtime loop if we have to emit more probes than this. */
#define MIN_UNROLL_PROBES 3
/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
if necessary. LAST_PROBE_OFFSET contains the offset of the closest
probe relative to the stack pointer.
Note that SIZE is negative.
The return value is true if TEMP_REG has been clobbered. */
static bool
allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
rtx temp_reg)
{
bool temp_reg_clobbered_p = false;
HOST_WIDE_INT probe_interval
= 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
HOST_WIDE_INT guard_size
= 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
if (flag_stack_clash_protection)
{
if (last_probe_offset + -INTVAL (size) < guard_size)
dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
else
{
rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
HOST_WIDE_INT num_probes = rounded_size / probe_interval;
HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
if (num_probes < MIN_UNROLL_PROBES)
{
/* Emit unrolled probe statements. */
for (unsigned int i = 0; i < num_probes; i++)
{
s390_prologue_plus_offset (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (-probe_interval), true);
s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
stack_pointer_rtx,
offset));
}
dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
}
else
{
/* Emit a loop probing the pages. */
rtx_code_label *loop_start_label = gen_label_rtx ();
/* From now on temp_reg will be the CFA register. */
s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
GEN_INT (-rounded_size), true);
emit_label (loop_start_label);
s390_prologue_plus_offset (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (-probe_interval), false);
s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
stack_pointer_rtx,
offset));
emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
GT, NULL_RTX,
Pmode, 1, loop_start_label);
/* Without this make_edges ICEes. */
JUMP_LABEL (get_last_insn ()) = loop_start_label;
LABEL_NUSES (loop_start_label) = 1;
/* That's going to be a NOP since stack pointer and
temp_reg are supposed to be the same here. We just
emit it to set the CFA reg back to r15. */
s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
const0_rtx, true);
temp_reg_clobbered_p = true;
dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
}
/* Handle any residual allocation request. */
s390_prologue_plus_offset (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (-residual), true);
last_probe_offset += residual;
if (last_probe_offset >= probe_interval)
s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
stack_pointer_rtx,
GEN_INT (residual
- UNITS_PER_LONG)));
return temp_reg_clobbered_p;
}
}
/* Subtract frame size from stack pointer. */
s390_prologue_plus_offset (stack_pointer_rtx,
stack_pointer_rtx,
size, true);
return temp_reg_clobbered_p;
}
/* Expand the prologue into a bunch of separate insns. */
void
@ -11179,6 +11356,19 @@ s390_emit_prologue (void)
else
temp_reg = gen_rtx_REG (Pmode, 1);
/* When probing for stack-clash mitigation, we have to track the distance
between the stack pointer and closest known reference.
Most of the time we have to make a worst cast assumption. The
only exception is when TARGET_BACKCHAIN is active, in which case
we know *sp (offset 0) was written. */
HOST_WIDE_INT probe_interval
= 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
HOST_WIDE_INT last_probe_offset
= (TARGET_BACKCHAIN
? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
: probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
s390_save_gprs_to_fprs ();
/* Save call saved gprs. */
@ -11190,6 +11380,14 @@ s390_emit_prologue (void)
- cfun_frame_layout.first_save_gpr_slot),
cfun_frame_layout.first_save_gpr,
cfun_frame_layout.last_save_gpr);
/* This is not 100% correct. If we have more than one register saved,
then LAST_PROBE_OFFSET can move even closer to sp. */
last_probe_offset
= (cfun_frame_layout.gprs_offset +
UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
- cfun_frame_layout.first_save_gpr_slot));
emit_insn (insn);
}
@ -11206,6 +11404,8 @@ s390_emit_prologue (void)
if (cfun_fpr_save_p (i))
{
save_fpr (stack_pointer_rtx, offset, i);
if (offset < last_probe_offset)
last_probe_offset = offset;
offset += 8;
}
else if (!TARGET_PACKED_STACK || cfun->stdarg)
@ -11219,6 +11419,8 @@ s390_emit_prologue (void)
if (cfun_fpr_save_p (i))
{
insn = save_fpr (stack_pointer_rtx, offset, i);
if (offset < last_probe_offset)
last_probe_offset = offset;
offset += 8;
/* If f4 and f6 are call clobbered they are saved due to
@ -11241,6 +11443,8 @@ s390_emit_prologue (void)
if (cfun_fpr_save_p (i))
{
insn = save_fpr (stack_pointer_rtx, offset, i);
if (offset < last_probe_offset)
last_probe_offset = offset;
RTX_FRAME_RELATED_P (insn) = 1;
offset -= 8;
@ -11260,10 +11464,11 @@ s390_emit_prologue (void)
if (cfun_frame_layout.frame_size > 0)
{
rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
rtx real_frame_off;
rtx_insn *stack_pointer_backup_loc;
bool temp_reg_clobbered_p;
if (s390_stack_size)
{
{
HOST_WIDE_INT stack_guard;
if (s390_stack_guard)
@ -11329,34 +11534,35 @@ s390_emit_prologue (void)
if (s390_warn_dynamicstack_p && cfun->calls_alloca)
warning (0, "%qs uses dynamic stack allocation", current_function_name ());
/* Save incoming stack pointer into temp reg. */
/* Save the location where we could backup the incoming stack
pointer. */
stack_pointer_backup_loc = get_last_insn ();
temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
temp_reg);
if (TARGET_BACKCHAIN || next_fpr)
insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
/* Subtract frame size from stack pointer. */
if (DISP_IN_RANGE (INTVAL (frame_off)))
{
insn = gen_rtx_SET (stack_pointer_rtx,
gen_rtx_PLUS (Pmode, stack_pointer_rtx,
frame_off));
insn = emit_insn (insn);
if (temp_reg_clobbered_p)
{
/* allocate_stack_space had to make use of temp_reg and
we need it to hold a backup of the incoming stack
pointer. Calculate back that value from the current
stack pointer. */
s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
GEN_INT (cfun_frame_layout.frame_size),
false);
}
else
{
/* allocate_stack_space didn't actually required
temp_reg. Insert the stack pointer backup insn
before the stack pointer decrement code - knowing now
that the value will survive. */
emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
stack_pointer_backup_loc);
}
}
else
{
if (!CONST_OK_FOR_K (INTVAL (frame_off)))
frame_off = force_const_mem (Pmode, frame_off);
insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
annotate_constant_pool_refs (&PATTERN (insn));
}
RTX_FRAME_RELATED_P (insn) = 1;
real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (stack_pointer_rtx,
gen_rtx_PLUS (Pmode, stack_pointer_rtx,
real_frame_off)));
/* Set backchain. */
@ -11381,6 +11587,8 @@ s390_emit_prologue (void)
emit_clobber (addr);
}
}
else if (flag_stack_clash_protection)
dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
/* Save fprs 8 - 15 (64 bit ABI). */

View File

@ -1,3 +1,11 @@
2017-09-20 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Jeff Law <law@redhat.com>
* gcc.dg/stack-check-5.c: Add argument for s390.
* lib/target-supports.exp:
(check_effective_target_supports_stack_clash_protection): Enable for
s390/s390x targets.
2017-09-20 Martin Sebor <msebor@redhat.com>
PR c/81854

View File

@ -3,6 +3,10 @@
/* { dg-require-effective-target supports_stack_clash_protection } */
/* Otherwise the S/390 back-end might save the stack pointer in f2 ()
into an FPR. */
/* { dg-additional-options "-msoft-float" { target { s390x-*-* } } } */
extern void foo (char *);
extern void bar (void);

View File

@ -8640,12 +8640,12 @@ proc check_effective_target_supports_stack_clash_protection { } {
# Temporary until the target bits are fully ACK'd.
# if { [istarget aarch*-*-*]
# || [istarget s390*-*-*]
# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } {
# return 1
# }
if { [istarget x86_64-*-*] || [istarget i?86-*-*] } {
if { [istarget x86_64-*-*] || [istarget i?86-*-*]
|| [istarget s390*-*-*] } {
return 1
}
return 0