mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-03 05:10:26 +08:00
arm: Fix MVE ICEs with vector moves and -mpure-code [PR97252]
This fixes around 500 ICEs in the testsuite which can be seen when testing with -march=armv8.1-m.main+mve -mfloat-abi=hard -mpure-code (leaving the testsuite free of ICEs in this configuration). All of the ICEs are in arm_print_operand (which is expecting a mem and gets another rtx, e.g. a const_vector) when running the output code for *mve_mov<mode> in alternative 4. The issue is that MVE vector moves were relying on the arm_reorg pass to move constant vectors that we can't easily synthesize to the literal pool. This doesn't work for -mpure-code where the literal pool is disabled. LLVM puts these in .rodata: I've chosen to do the same here. With this change, for -mpure-code, we no longer want to allow a constant on the RHS of a vector load in RA. To achieve this, I added a new constraint which matches constants only if the literal pool is available. gcc/ChangeLog: PR target/97252 * config/arm/arm-protos.h (neon_make_constant): Add generate argument to guard emitting insns, default to true. * config/arm/arm.c (arm_legitimate_constant_p_1): Reject CONST_VECTORs which neon_make_constant can't handle. (neon_vdup_constant): Add generate argument, avoid emitting insns if it's not set. (neon_make_constant): Plumb new generate argument through. * config/arm/constraints.md (Ui): New. Use it... * config/arm/mve.md (*mve_mov<mode>): ... here. * config/arm/vec-common.md (movv8hf): Use neon_make_constant to synthesize constants.
This commit is contained in:
parent
57d9ac6728
commit
150a829acc
@ -101,7 +101,7 @@ extern char *neon_output_shift_immediate (const char *, char, rtx *,
|
||||
machine_mode, int, bool);
|
||||
extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
|
||||
rtx (*) (rtx, rtx, rtx));
|
||||
extern rtx neon_make_constant (rtx);
|
||||
extern rtx neon_make_constant (rtx, bool generate = true);
|
||||
extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
|
||||
extern void neon_expand_vector_init (rtx, rtx);
|
||||
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
|
||||
|
@ -9461,6 +9461,9 @@ arm_tls_referenced_p (rtx x)
|
||||
static bool
|
||||
arm_legitimate_constant_p_1 (machine_mode, rtx x)
|
||||
{
|
||||
if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
|
||||
return false;
|
||||
|
||||
return flag_pic || !label_mentioned_p (x);
|
||||
}
|
||||
|
||||
@ -13025,12 +13028,14 @@ neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
|
||||
}
|
||||
}
|
||||
|
||||
/* If VALS is a vector constant that can be loaded into a register
|
||||
using VDUP, generate instructions to do so and return an RTX to
|
||||
assign to the register. Otherwise return NULL_RTX. */
|
||||
/* Return a non-NULL RTX iff VALS is a vector constant that can be
|
||||
loaded into a register using VDUP.
|
||||
|
||||
If this is the case, and GENERATE is set, we also generate
|
||||
instructions to do this and return an RTX to assign to the register. */
|
||||
|
||||
static rtx
|
||||
neon_vdup_constant (rtx vals)
|
||||
neon_vdup_constant (rtx vals, bool generate)
|
||||
{
|
||||
machine_mode mode = GET_MODE (vals);
|
||||
machine_mode inner_mode = GET_MODE_INNER (mode);
|
||||
@ -13046,6 +13051,9 @@ neon_vdup_constant (rtx vals)
|
||||
vdup.i16). */
|
||||
return NULL_RTX;
|
||||
|
||||
if (!generate)
|
||||
return x;
|
||||
|
||||
/* We can load this constant by using VDUP and a constant in a
|
||||
single ARM register. This will be cheaper than a vector
|
||||
load. */
|
||||
@ -13054,13 +13062,15 @@ neon_vdup_constant (rtx vals)
|
||||
return gen_vec_duplicate (mode, x);
|
||||
}
|
||||
|
||||
/* Generate code to load VALS, which is a PARALLEL containing only
|
||||
constants (for vec_init) or CONST_VECTOR, efficiently into a
|
||||
register. Returns an RTX to copy into the register, or NULL_RTX
|
||||
for a PARALLEL that cannot be converted into a CONST_VECTOR. */
|
||||
/* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
|
||||
constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
|
||||
into a register.
|
||||
|
||||
If this is the case, and GENERATE is set, we also generate code to do
|
||||
this and return an RTX to copy into the register. */
|
||||
|
||||
rtx
|
||||
neon_make_constant (rtx vals)
|
||||
neon_make_constant (rtx vals, bool generate)
|
||||
{
|
||||
machine_mode mode = GET_MODE (vals);
|
||||
rtx target;
|
||||
@ -13092,7 +13102,7 @@ neon_make_constant (rtx vals)
|
||||
&& simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
|
||||
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
|
||||
return const_vec;
|
||||
else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
|
||||
else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
|
||||
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
|
||||
pipeline cycle; creating the constant takes one or two ARM
|
||||
pipeline cycles. */
|
||||
@ -13102,7 +13112,7 @@ neon_make_constant (rtx vals)
|
||||
(for either double or quad vectors). We cannot take advantage
|
||||
of single-cycle VLD1 because we need a PC-relative addressing
|
||||
mode. */
|
||||
return const_vec;
|
||||
return arm_disable_literal_pool ? NULL_RTX : const_vec;
|
||||
else
|
||||
/* A PARALLEL containing something not valid inside CONST_VECTOR.
|
||||
We cannot construct an initializer. */
|
||||
|
@ -506,6 +506,13 @@
|
||||
&& mve_vector_mem_operand (GET_MODE (op),
|
||||
XEXP (op, 0), true)")))
|
||||
|
||||
(define_constraint "Ui"
|
||||
"@internal
|
||||
Match a constant (as per the 'i' constraint) provided that we have the
|
||||
literal pool available. This is useful for load insns that would need
|
||||
to move such constants to the literal pool after RA."
|
||||
(match_test "!arm_disable_literal_pool && satisfies_constraint_i (op)"))
|
||||
|
||||
(define_memory_constraint "Uq"
|
||||
"@internal
|
||||
In ARM state an address valid in ldrsb instructions."
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
(define_insn "*mve_mov<mode>"
|
||||
[(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Ux,w")
|
||||
(match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,Uxi,r,Dm,w,Ul"))]
|
||||
(match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,UxUi,r,Dm,w,Ul"))]
|
||||
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
|
||||
{
|
||||
if (which_alternative == 3 || which_alternative == 6)
|
||||
|
@ -74,6 +74,11 @@
|
||||
{
|
||||
if (!REG_P (operands[0]))
|
||||
operands[1] = force_reg (E_V8HFmode, operands[1]);
|
||||
else if (TARGET_HAVE_MVE_FLOAT && CONSTANT_P (operands[1]))
|
||||
{
|
||||
operands[1] = neon_make_constant (operands[1]);
|
||||
gcc_assert (operands[1] != NULL_RTX);
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user