AArch64: Enable TARGET_CONST_ANCHOR

Enable TARGET_CONST_ANCHOR to allow complex constants to be created via
immediate add/sub.  Use a 24-bit range as that enables a 3 or 4-instruction
immediate to be replaced by 2 add/sub instructions.  Fix the costing of
add/sub to support 24-bit and 12-bit shifted immediates.
The generated code for the testcase is now the same or better than LLVM.
It also results in a small codesize reduction on SPEC.

gcc/
	* config/aarch64/aarch64.cc (aarch64_rtx_costs): Add correct costs
	for 24-bit and 12-bit shifted immediate add/sub.
	(TARGET_CONST_ANCHOR): Define.
	* config/aarch64/predicates.md (aarch64_pluslong_immediate):
	Fix range check.

gcc/testsuite/
	* gcc.target/aarch64/movk_3.c: New test.
This commit is contained in:
Wilco Dijkstra 2022-12-12 15:44:03 +00:00
parent 4d9db4bdd4
commit 2d7c73ee5e
3 changed files with 70 additions and 1 deletions

View File

@ -14237,6 +14237,16 @@ cost_plus:
return true;
}
if (aarch64_pluslong_immediate (op1, mode))
{
/* 24-bit add in 2 instructions or 12-bit shifted add. */
if ((INTVAL (op1) & 0xfff) != 0)
*cost += COSTS_N_INSNS (1);
*cost += rtx_cost (op0, mode, PLUS, 0, speed);
return true;
}
*cost += rtx_cost (op1, mode, PLUS, 1, speed);
/* Look for ADD (extended register). */
@ -28091,6 +28101,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_HAVE_SHADOW_CALL_STACK
#define TARGET_HAVE_SHADOW_CALL_STACK true
#undef TARGET_CONST_ANCHOR
#define TARGET_CONST_ANCHOR 0x1000000
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h"

View File

@ -146,7 +146,7 @@
(define_predicate "aarch64_pluslong_immediate"
(and (match_code "const_int")
(match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
(match_test "IN_RANGE (INTVAL (op), -0xffffff, 0xffffff)")))
(define_predicate "aarch64_sminmax_immediate"
(and (match_code "const_int")

View File

@ -0,0 +1,56 @@
/* { dg-do compile } */
/* { dg-options "-O2 --save-temps" } */
/* 2 MOV */
void f16 (long *p)
{
p[0] = 0x1234;
p[2] = 0x1235;
}
/* MOV, MOVK and ADD */
void f32_1 (long *p)
{
p[0] = 0x12345678;
p[2] = 0x12345678 + 0xfff;
}
/* 2 MOV, 2 MOVK */
void f32_2 (long *p)
{
p[0] = 0x12345678;
p[2] = 0x12345678 + 0x555555;
}
/* MOV, MOVK and ADD */
void f32_3 (long *p)
{
p[0] = 0x12345678;
p[2] = 0x12345678 + 0x999000;
}
/* MOV, 2 MOVK and ADD */
void f48_1 (long *p)
{
p[0] = 0x123456789abc;
p[2] = 0x123456789abc + 0xfff;
}
/* MOV, 2 MOVK and 2 ADD */
void f48_2 (long *p)
{
p[0] = 0x123456789abc;
p[2] = 0x123456789abc + 0x666666;
}
/* 2 MOV, 4 MOVK */
void f48_3 (long *p)
{
p[0] = 0x123456789abc;
p[2] = 0x123456789abc + 0x1666666;
}
/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, \[0-9\]+" 10 } } */
/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x\[0-9a-f\]+" 12 } } */
/* { dg-final { scan-assembler-times "add\tx\[0-9\]+, x\[0-9\]+, \[0-9\]+" 5 } } */