mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-23 12:10:57 +08:00
i386: Omit clobbers from vzeroupper until final [PR92190]
As mentioned in the PR, the CLOBBERs in vzeroupper are added there even for registers that aren't ever live in the function before and break the prologue/epilogue expansion with ms ABI (normal ABIs are fine, as they consider all [xyz]mm registers call clobbered, but the ms ABI considers xmm0-15 call used but the bits above low 128 ones call clobbered). The following patch fixes it by not adding the clobbers during vzeroupper pass (before pro_and_epilogue), but adding them for -fipa-ra purposes only during the final output. Perhaps we could add some CLOBBERs early (say for df_regs_ever_live_p regs that aren't live in the live_regs bitmap, or depending on the ABI either add all of them immediately, or for ms ABI add CLOBBERs for xmm0-xmm5 if they don't have a SET) and add the rest later. And the addition could be perhaps done at other spots, e.g. in an epilogue_completed guarded splitter. 2020-02-05 Jakub Jelinek <jakub@redhat.com> PR target/92190 * config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper): Only include sets and not clobbers in the vzeroupper pattern. * config/i386/sse.md (*avx_vzeroupper): Require in insn condition that the parallel has 17 (64-bit) or 9 (32-bit) elts. (*avx_vzeroupper_1): New define_insn_and_split. * gcc.target/i386/pr92190.c: New test.
This commit is contained in:
parent
17a2e8c091
commit
b7b3378f91
@ -1,5 +1,12 @@
|
||||
2020-02-05 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/92190
|
||||
* config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper): Only
|
||||
include sets and not clobbers in the vzeroupper pattern.
|
||||
* config/i386/sse.md (*avx_vzeroupper): Require in insn condition that
|
||||
the parallel has 17 (64-bit) or 9 (32-bit) elts.
|
||||
(*avx_vzeroupper_1): New define_insn_and_split.
|
||||
|
||||
PR target/92190
|
||||
* recog.c (pass_split_after_reload::gate): For STACK_REGS targets,
|
||||
don't run when !optimize.
|
||||
|
@ -1764,29 +1764,32 @@ convert_scalars_to_vector (bool timode_p)
|
||||
|
||||
(set (reg:V2DF R) (reg:V2DF R))
|
||||
|
||||
which preserves the low 128 bits but clobbers the upper bits.
|
||||
For a dead register we just use:
|
||||
|
||||
(clobber (reg:V2DF R))
|
||||
|
||||
which invalidates any previous contents of R and stops R from becoming
|
||||
live across the vzeroupper in future. */
|
||||
which preserves the low 128 bits but clobbers the upper bits. */
|
||||
|
||||
static void
|
||||
ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
|
||||
{
|
||||
rtx pattern = PATTERN (insn);
|
||||
unsigned int nregs = TARGET_64BIT ? 16 : 8;
|
||||
rtvec vec = rtvec_alloc (nregs + 1);
|
||||
RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
|
||||
unsigned int npats = nregs;
|
||||
for (unsigned int i = 0; i < nregs; ++i)
|
||||
{
|
||||
unsigned int regno = GET_SSE_REGNO (i);
|
||||
if (!bitmap_bit_p (live_regs, regno))
|
||||
npats--;
|
||||
}
|
||||
if (npats == 0)
|
||||
return;
|
||||
rtvec vec = rtvec_alloc (npats + 1);
|
||||
RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
|
||||
for (unsigned int i = 0, j = 0; i < nregs; ++i)
|
||||
{
|
||||
unsigned int regno = GET_SSE_REGNO (i);
|
||||
if (!bitmap_bit_p (live_regs, regno))
|
||||
continue;
|
||||
rtx reg = gen_rtx_REG (V2DImode, regno);
|
||||
if (bitmap_bit_p (live_regs, regno))
|
||||
RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
|
||||
else
|
||||
RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
|
||||
++j;
|
||||
RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
|
||||
}
|
||||
XVEC (pattern, 0) = vec;
|
||||
df_insn_rescan (insn);
|
||||
|
@ -19818,7 +19818,7 @@
|
||||
(define_insn "*avx_vzeroupper"
|
||||
[(match_parallel 0 "vzeroupper_pattern"
|
||||
[(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
|
||||
"TARGET_AVX"
|
||||
"TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
|
||||
"vzeroupper"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "modrm" "0")
|
||||
@ -19827,6 +19827,44 @@
|
||||
(set_attr "btver2_decode" "vector")
|
||||
(set_attr "mode" "OI")])
|
||||
|
||||
(define_insn_and_split "*avx_vzeroupper_1"
|
||||
[(match_parallel 0 "vzeroupper_pattern"
|
||||
[(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
|
||||
"TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
|
||||
"#"
|
||||
"&& epilogue_completed"
|
||||
[(match_dup 0)]
|
||||
{
|
||||
/* For IPA-RA purposes, make it clear the instruction clobbers
|
||||
even XMM registers not mentioned explicitly in the pattern. */
|
||||
unsigned int nregs = TARGET_64BIT ? 16 : 8;
|
||||
unsigned int npats = XVECLEN (operands[0], 0);
|
||||
rtvec vec = rtvec_alloc (nregs + 1);
|
||||
RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
|
||||
for (unsigned int i = 0, j = 1; i < nregs; ++i)
|
||||
{
|
||||
unsigned int regno = GET_SSE_REGNO (i);
|
||||
if (j < npats
|
||||
&& REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
|
||||
{
|
||||
RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
|
||||
j++;
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx reg = gen_rtx_REG (V2DImode, regno);
|
||||
RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
|
||||
}
|
||||
}
|
||||
operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
|
||||
}
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "modrm" "0")
|
||||
(set_attr "memory" "none")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "btver2_decode" "vector")
|
||||
(set_attr "mode" "OI")])
|
||||
|
||||
(define_mode_attr pbroadcast_evex_isa
|
||||
[(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
|
||||
(V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
|
||||
|
@ -1,3 +1,8 @@
|
||||
2020-02-05 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/92190
|
||||
* gcc.target/i386/pr92190.c: New test.
|
||||
|
||||
2020-02-05 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR testsuite/92177
|
||||
|
19
gcc/testsuite/gcc.target/i386/pr92190.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr92190.c
Normal file
@ -0,0 +1,19 @@
|
||||
/* PR target/92190 */
|
||||
/* { dg-do compile { target { *-*-linux* && lp64 } } } */
|
||||
/* { dg-options "-mabi=ms -O2 -mavx512f" } */
|
||||
|
||||
typedef char VC __attribute__((vector_size (16)));
|
||||
typedef int VI __attribute__((vector_size (16 * sizeof 0)));
|
||||
VC a;
|
||||
VI b;
|
||||
void bar (VI);
|
||||
void baz (VC);
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
VC k = a;
|
||||
VI n = b;
|
||||
bar (n);
|
||||
baz (k);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user