[amdgcn] Restrict registers available to non-kernel functions

Restrict the number of SGPRs and VGPRs available to non-kernel functions
to improve compute-unit occupancy with multiple threads.

2019-11-15  Kwok Cheung Yeung  <kcy@codesourcery.com>

	gcc/
	* config/gcn/gcn.c (default_requested_args): New.
	(gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args
	set with default_requested_args.
	(gcn_conditional_register_usage): Limit register usage of non-kernel
	functions.  Reassign fixed registers if a non-standard set of args is
	requested.
	* config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI.

From-SVN: r278301
This commit is contained in:
Kwok Cheung Yeung 2019-11-15 15:36:34 +00:00 committed by Kwok Cheung Yeung
parent 1c3c3f4550
commit 342f946476
3 changed files with 50 additions and 31 deletions

View File

@ -1,3 +1,13 @@
2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
* config/gcn/gcn.c (default_requested_args): New.
(gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args
set with default_requested_args.
(gcn_conditional_register_usage): Limit register usage of non-kernel
functions. Reassign fixed registers if a non-standard set of args is
requested.
* config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI.
2019-11-15 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/92528

View File

@ -191,6 +191,17 @@ static const struct gcn_kernel_arg_type
{"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
};
static const long default_requested_args
= (1 << PRIVATE_SEGMENT_BUFFER_ARG)
| (1 << DISPATCH_PTR_ARG)
| (1 << QUEUE_PTR_ARG)
| (1 << KERNARG_SEGMENT_PTR_ARG)
| (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
| (1 << WORKGROUP_ID_X_ARG)
| (1 << WORK_ITEM_ID_X_ARG)
| (1 << WORK_ITEM_ID_Y_ARG)
| (1 << WORK_ITEM_ID_Z_ARG);
/* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
This function also sets the default values for some arguments.
@ -201,10 +212,7 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
tree list)
{
bool err = false;
args->requested = ((1 << PRIVATE_SEGMENT_BUFFER_ARG)
| (1 << QUEUE_PTR_ARG)
| (1 << KERNARG_SEGMENT_PTR_ARG)
| (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG));
args->requested = default_requested_args;
args->nargs = 0;
for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@ -242,8 +250,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
args->requested |= (1 << a);
args->order[args->nargs++] = a;
}
args->requested |= (1 << WORKGROUP_ID_X_ARG);
args->requested |= (1 << WORK_ITEM_ID_Z_ARG);
/* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
@ -253,10 +259,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
args->requested |= (1 << WORK_ITEM_ID_X_ARG);
/* Always enable this so that kernargs is in a predictable place for
gomp_print, etc. */
args->requested |= (1 << DISPATCH_PTR_ARG);
int sgpr_regno = FIRST_SGPR_REG;
args->nsgprs = 0;
for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@ -2045,27 +2047,34 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
static void
gcn_conditional_register_usage (void)
{
int i;
if (!cfun || !cfun->machine)
return;
/* FIXME: Do we need to reset fixed_regs? */
/* Limit ourselves to 1/16 the register file for maximimum sized workgroups.
There are enough SGPRs not to limit those.
TODO: Adjust this more dynamically. */
for (i = FIRST_VGPR_REG + 64; i <= LAST_VGPR_REG; i++)
fixed_regs[i] = 1, call_used_regs[i] = 1;
if (!cfun || !cfun->machine || cfun->machine->normal_function)
if (cfun->machine->normal_function)
{
/* Normal functions can't know what kernel argument registers are
live, so just fix the bottom 16 SGPRs, and bottom 3 VGPRs. */
for (i = 0; i < 16; i++)
fixed_regs[FIRST_SGPR_REG + i] = 1;
for (i = 0; i < 3; i++)
fixed_regs[FIRST_VGPR_REG + i] = 1;
/* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
for (int i = SGPR_REGNO (62); i <= LAST_SGPR_REG; i++)
fixed_regs[i] = 1, call_used_regs[i] = 1;
for (int i = VGPR_REGNO (24); i <= LAST_VGPR_REG; i++)
fixed_regs[i] = 1, call_used_regs[i] = 1;
return;
}
/* If the set of requested args is the default set, nothing more needs to
be done. */
if (cfun->machine->args.requested == default_requested_args)
return;
/* Requesting a set of args different from the default violates the ABI. */
if (!leaf_function_p ())
warning (0, "A non-default set of initial values has been requested, "
"which violates the ABI!");
for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
fixed_regs[i] = 0;
/* Fix the runtime argument register containing values that may be
needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
needed after the prologue so there's no need to fix them. */
@ -2073,10 +2082,10 @@ gcn_conditional_register_usage (void)
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
{
/* The upper 32-bits of the 64-bit descriptor are not used, so allow
the containing registers to be used for other purposes. */
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 2] = 1;
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 3] = 1;
}
if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
{

View File

@ -160,9 +160,9 @@
#define FIXED_REGISTERS { \
/* Scalars. */ \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
1, 1, 0, 0, 1, 1, 0, 0, 1, 1, \
/* fp sp lr. */ \
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, \
1, 1, 0, 0, 1, 1, 1, 1, 0, 0, \
/* exec_save, cc_save */ \
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
@ -180,7 +180,7 @@
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
/* VGRPs */ \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \