Ensure that outgoing argument size is at least 8 bytes when alloca and stack-clash.

This patch adds a requirement that the number of outgoing arguments for a
function is at least 8 bytes when using stack-clash protection and alloca.

By using this condition we can avoid a check in the alloca code and so have
smaller and simpler code there.

A simplified version of the AArch64 stack frames is:

   +-----------------------+                                              
   |                       |                                                 
   |                       |                                              
   |                       |                                              
   +-----------------------+                                              
   |LR                     |                                              
   +-----------------------+                                              
   |FP                     |                                              
   +-----------------------+                                              
   |dynamic allocations    | ----  expanding area which will push the outgoing
   +-----------------------+       args down during each allocation.
   |padding                |
   +-----------------------+
   |outgoing stack args    | ---- safety buffer of 8 bytes (aligned)
   +-----------------------+

By always defining an outgoing argument, alloca(0) effectively is safe to probe
at $sp due to the reserved buffer being there.  It will never corrupt the stack.

This is also safe for alloca(x) where x is 0 or x % page_size == 0.  In the
former it is the same case as alloca(0) while the latter is safe because any
allocation pushes the outgoing stack args down:

   |FP                     |                                              
   +-----------------------+                                              
   |                       |
   |dynamic allocations    | ----  alloca (x)
   |                       |
   +-----------------------+
   |padding                |
   +-----------------------+
   |outgoing stack args    | ---- safety buffer of 8 bytes (aligned)
   +-----------------------+

Which means when you probe for the residual, if it's 0 you'll again just probe
in the outgoing stack args range, which we know is non-zero (at least 8 bytes).

gcc/

	PR target/86486
	* config/aarch64/aarch64.h (STACK_CLASH_MIN_BYTES_OUTGOING_ARGS,
	STACK_DYNAMIC_OFFSET): New.
	* config/aarch64/aarch64.c (aarch64_layout_frame):
	Update outgoing args size.
	(aarch64_stack_clash_protection_alloca_probe_range,
	TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE): New.

gcc/testsuite/

	PR target/86486
	* gcc.target/aarch64/stack-check-alloca-1.c: New.
	* gcc.target/aarch64/stack-check-alloca-10.c: New.
	* gcc.target/aarch64/stack-check-alloca-2.c: New.
	* gcc.target/aarch64/stack-check-alloca-3.c: New.
	* gcc.target/aarch64/stack-check-alloca-4.c: New.
	* gcc.target/aarch64/stack-check-alloca-5.c: New.
	* gcc.target/aarch64/stack-check-alloca-6.c: New.
	* gcc.target/aarch64/stack-check-alloca-7.c: New.
	* gcc.target/aarch64/stack-check-alloca-8.c: New.
	* gcc.target/aarch64/stack-check-alloca-9.c: New.
	* gcc.target/aarch64/stack-check-alloca.h: New.
	* gcc.target/aarch64/stack-check-14.c: New.
	* gcc.target/aarch64/stack-check-15.c: New.

From-SVN: r264751
This commit is contained in:
Tamar Christina 2018-10-01 13:00:58 +00:00 committed by Tamar Christina
parent 2c25083e75
commit 8c6e3b2355
17 changed files with 235 additions and 0 deletions

View File

@ -1,3 +1,13 @@
2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486
* config/aarch64/aarch64.h (STACK_CLASH_MIN_BYTES_OUTGOING_ARGS,
STACK_DYNAMIC_OFFSET): New.
* config/aarch64/aarch64.c (aarch64_layout_frame):
Update outgoing args size.
(aarch64_stack_clash_protection_alloca_probe_range,
TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE): New.
2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486

View File

@ -4129,6 +4129,10 @@ aarch64_layout_frame (void)
cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
/* Adjust the outgoing arguments size if required. Keep it in sync with what
the mid-end is doing. */
crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
#define SLOT_NOT_REQUIRED (-2)
#define SLOT_REQUIRED (-1)
@ -4899,6 +4903,16 @@ aarch64_set_handled_components (sbitmap components)
cfun->machine->reg_is_wrapped_separately[regno] = true;
}
/* On AArch64 we have an ABI defined safe buffer. This constant is used to
determining the probe offset for alloca. */
static HOST_WIDE_INT
aarch64_stack_clash_protection_alloca_probe_range (void)
{
return STACK_CLASH_CALLER_GUARD;
}
/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
registers. If POLY_SIZE is not large enough to require a probe this function
will only adjust the stack. When allocating the stack space
@ -18413,6 +18427,10 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
aarch64_stack_clash_protection_alloca_probe_range
#undef TARGET_COMPUTE_PRESSURE_CLASSES
#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes

View File

@ -88,6 +88,10 @@
before probing has to be done for stack clash protection. */
#define STACK_CLASH_CALLER_GUARD 1024
/* This value represents the minimum amount of bytes we expect the function's
outgoing arguments to be when stack-clash is enabled. */
#define STACK_CLASH_MIN_BYTES_OUTGOING_ARGS 8
/* This value controls how many pages we manually unroll the loop for when
generating stack clash probes. */
#define STACK_CLASH_MAX_UNROLL_PAGES 4
@ -1076,4 +1080,17 @@ extern poly_uint16 aarch64_sve_vg;
#define REGMODE_NATURAL_SIZE(MODE) aarch64_regmode_natural_size (MODE)
/* Allocate a minimum of STACK_CLASH_MIN_BYTES_OUTGOING_ARGS bytes for the
outgoing arguments if stack clash protection is enabled. This is essential
as the extra arg space allows us to skip a check in alloca. */
#undef STACK_DYNAMIC_OFFSET
#define STACK_DYNAMIC_OFFSET(FUNDECL) \
((flag_stack_clash_protection \
&& cfun->calls_alloca \
&& known_lt (crtl->outgoing_args_size, \
STACK_CLASH_MIN_BYTES_OUTGOING_ARGS)) \
? ROUND_UP (STACK_CLASH_MIN_BYTES_OUTGOING_ARGS, \
STACK_BOUNDARY / BITS_PER_UNIT) \
: (crtl->outgoing_args_size + STACK_POINTER_OFFSET))
#endif /* GCC_AARCH64_H */

View File

@ -1,3 +1,20 @@
2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486
* gcc.target/aarch64/stack-check-alloca-1.c: New.
* gcc.target/aarch64/stack-check-alloca-10.c: New.
* gcc.target/aarch64/stack-check-alloca-2.c: New.
* gcc.target/aarch64/stack-check-alloca-3.c: New.
* gcc.target/aarch64/stack-check-alloca-4.c: New.
* gcc.target/aarch64/stack-check-alloca-5.c: New.
* gcc.target/aarch64/stack-check-alloca-6.c: New.
* gcc.target/aarch64/stack-check-alloca-7.c: New.
* gcc.target/aarch64/stack-check-alloca-8.c: New.
* gcc.target/aarch64/stack-check-alloca-9.c: New.
* gcc.target/aarch64/stack-check-alloca.h: New.
* gcc.target/aarch64/stack-check-14.c: New.
* gcc.target/aarch64/stack-check-15.c: New.
2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486

View File

@ -0,0 +1,24 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
int t1(int);
int t2(int x)
{
char *p = __builtin_alloca (4050);
x = t1 (x);
return p[x];
}
/* This test has a constant sized alloca that is smaller than the
probe interval. Only one probe is required since the value is larger
than 1024 bytes but smaller than 63k.
The form can change quite a bit so we just check for two
probes without looking at the actual address. */
/* { dg-final { scan-assembler-times "str\\txzr," 1 } } */

View File

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
int t1(int);
int t2(int x)
{
char *p = __builtin_alloca (x);
x = t1 (x);
return p[x];
}
/* This test has a variable sized alloca. It requires 3 probes.
One in the loop, one for the residual, one for when it's < 1024 and one for
when it's not.
The form can change quite a bit so we just check for two
probes without looking at the actual address. */
/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */

View File

@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE y
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 2 } } */
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp\]} 1 } } */
/* Dynamic alloca, expect loop, and 2 probes with 1kB offset and 1 at sp.
1st probe is inside the loop for the full guard-size allocations, second
probe is for the case where residual is zero and the final probe for when
residiual is > 1024 bytes. */

View File

@ -0,0 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 127.5 * 64 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 2 } } */
/* Large alloca of an amount which isn't a multiple of a guard-size, and
residiual is more than 1kB. Loop expected with one 1Kb probe offset and
one residual probe at offset 1kB. */

View File

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 0
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-not {str\s+xzr,} } } */
/* Alloca of 0 should emit no probes, boundary condition. */

View File

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 100
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 8\]} 1 } } */
/* Alloca is less than 1kB, 1 probe expected at word offset. */

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 2 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is more than 1kB, but less than guard-size, 1 probe expected at
1kB offset. */

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 63 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is more than 1kB, but less than guard-size, 1 probe expected at
1kB offset. */

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 63.5 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is more than 1kB, but less than guard-size, 1 probe expected at 1kB
offset. */

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 64 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is exactly one guard-size, 1 probe expected at 1kB offset.
Boundary condition. */

View File

@ -0,0 +1,13 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 65 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 8\]} 1 } } */
/* Alloca is more than one guard-page, and residual is exactly 1Kb. 2 probes
expected. One at 1kB offset for the guard-size allocation and one at word
offset for the residual. */

View File

@ -0,0 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 127 * 64 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Large alloca of a constant amount which is a multiple of a guard-size,
no residiual. Loop expected with one 1Kb probe offset and no residual probe
because residual is at compile time known to be zero. */

View File

@ -0,0 +1,13 @@
#include <alloca.h>
__attribute__((noinline, noipa))
void g (char* ptr, int y)
{
ptr[y] = '\0';
}
void f_caller (int y)
{
char* pStr = alloca(SIZE);
g (pStr, y);
}