diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a7b22a83fcfe..cacb95c58bf4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2018-01-10 H.J. Lu + + PR target/83735 + * config/i386/i386.c (ix86_compute_frame_layout): Always adjust + stack_realign_offset for the largest alignment of stack slot + actually used. + (ix86_find_max_used_stack_alignment): New function. + (ix86_finalize_stack_frame_flags): Use it. Set + max_used_stack_alignment if we don't realign stack. + * config/i386/i386.h (machine_function): Add + max_used_stack_alignment. + 2018-01-10 Christophe Lyon * config/arm/arm.opt (-mbranch-cost): New option. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d10b1371bcca..5e17b694d7fa 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -11259,7 +11259,11 @@ ix86_compute_frame_layout (void) /* Calculate the size of the va-arg area (not including padding, if any). */ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; - if (stack_realign_fp) + /* Also adjust stack_realign_offset for the largest alignment of + stack slot actually used. */ + if (stack_realign_fp + || (cfun->machine->max_used_stack_alignment != 0 + && (offset % cfun->machine->max_used_stack_alignment) != 0)) { /* We may need a 16-byte aligned stack for the remainder of the register save area, but the stack frame for the local function @@ -12668,6 +12672,62 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Return true if stack frame is required. Update STACK_ALIGNMENT + to the largest alignment, in bits, of stack slot used if stack + frame is required and CHECK_STACK_SLOT is true. */ + +static bool +ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, + bool check_stack_slot) +{ + HARD_REG_SET set_up_by_prologue, prologue_used; + basic_block bb; + + CLEAR_HARD_REG_SET (prologue_used); + CLEAR_HARD_REG_SET (set_up_by_prologue); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, + HARD_FRAME_POINTER_REGNUM); + + /* The preferred stack alignment is the minimum stack alignment. */ + if (stack_alignment > crtl->preferred_stack_boundary) + stack_alignment = crtl->preferred_stack_boundary; + + bool require_stack_frame = false; + + FOR_EACH_BB_FN (bb, cfun) + { + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn) + && requires_stack_frame_p (insn, prologue_used, + set_up_by_prologue)) + { + require_stack_frame = true; + + if (check_stack_slot) + { + /* Find the maximum stack alignment. */ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) + if (MEM_P (*iter) + && (reg_mentioned_p (stack_pointer_rtx, + *iter) + || reg_mentioned_p (frame_pointer_rtx, + *iter))) + { + unsigned int alignment = MEM_ALIGN (*iter); + if (alignment > stack_alignment) + stack_alignment = alignment; + } + } + } + } + + return require_stack_frame; +} + /* Finalize stack_realign_needed and frame_pointer_needed flags, which will guide prologue/epilogue to be generated in correct form. */ @@ -12718,52 +12778,8 @@ ix86_finalize_stack_frame_flags (void) && ix86_nsaved_sseregs () == 0 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) { - HARD_REG_SET set_up_by_prologue, prologue_used; - basic_block bb; - - CLEAR_HARD_REG_SET (prologue_used); - CLEAR_HARD_REG_SET (set_up_by_prologue); - add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); - add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); - add_to_hard_reg_set (&set_up_by_prologue, Pmode, - HARD_FRAME_POINTER_REGNUM); - - /* The preferred stack alignment is the minimum stack alignment. */ - if (stack_alignment > crtl->preferred_stack_boundary) - stack_alignment = crtl->preferred_stack_boundary; - - bool require_stack_frame = false; - - FOR_EACH_BB_FN (bb, cfun) - { - rtx_insn *insn; - FOR_BB_INSNS (bb, insn) - if (NONDEBUG_INSN_P (insn) - && requires_stack_frame_p (insn, prologue_used, - set_up_by_prologue)) - { - require_stack_frame = true; - - if (stack_realign) - { - /* Find the maximum stack alignment. */ - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) - if (MEM_P (*iter) - && (reg_mentioned_p (stack_pointer_rtx, - *iter) - || reg_mentioned_p (frame_pointer_rtx, - *iter))) - { - unsigned int alignment = MEM_ALIGN (*iter); - if (alignment > stack_alignment) - stack_alignment = alignment; - } - } - } - } - - if (require_stack_frame) + if (ix86_find_max_used_stack_alignment (stack_alignment, + stack_realign)) { /* Stack frame is required. If stack alignment needed is less than incoming stack boundary, don't realign stack. */ @@ -12851,6 +12867,16 @@ ix86_finalize_stack_frame_flags (void) recompute_frame_layout_p = true; } } + else if (crtl->max_used_stack_slot_alignment + > crtl->preferred_stack_boundary) + { + /* We don't need to realign stack. But we still need to keep + stack frame properly aligned to satisfy the largest alignment + of stack slots. */ + if (ix86_find_max_used_stack_alignment (stack_alignment, true)) + cfun->machine->max_used_stack_alignment + = stack_alignment / BITS_PER_UNIT; + } if (crtl->stack_realign_needed != stack_realign) recompute_frame_layout_p = true; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 6928f809d99e..933f261ea663 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2598,6 +2598,9 @@ struct GTY(()) machine_function { /* Nonzero if the function places outgoing arguments on stack. */ BOOL_BITFIELD outgoing_args_on_stack : 1; + /* The largest alignment, in bytes, of stack slot actually used. */ + unsigned int max_used_stack_alignment; + /* During prologue/epilogue generation, the current frame state. Otherwise, the frame state at the end of the prologue. */ struct machine_frame_state fs; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c6adde848668..a46a5ee6722e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-01-10 H.J. Lu + + PR target/83735 + * gcc.target/i386/pr83735.c: New test. + 2018-01-10 Christophe Lyon * lib/target-supports.exp (check_effective_target_branch_cost): diff --git a/gcc/testsuite/gcc.target/i386/pr83735.c b/gcc/testsuite/gcc.target/i386/pr83735.c new file mode 100644 index 000000000000..786c90a58394 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr83735.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -mavx" } */ + +#include +#include "cpuid.h" +#include "m256-check.h" +#include "avx-os-support.h" + +static void __attribute__((constructor)) +check_avx (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + exit (0); + + /* Run AVX test only if host has AVX support. */ + if (((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE)) + && avx_os_support ()) + return; + + exit (0); +} + +struct S +{ + short b; + long c; + char d; + long e; + unsigned:8; +}; + +int f, h, k, l; +int g[10]; +volatile struct S j; +char m; + +int +main (void) +{ + int i; + struct S n; + for (i = 0; i < 6; i++) + { + for (f = 0; f < 10; f++) + g[f] = 4; + n = j; + h = m == 0 ? 1 : 5 % m; + if (l) + n.b = k; + } + return n.b; +}