nvptx: support bar.red instruction

This patch adds support for the PTX 'bar.red' (i.e. "barrier reduction")
instruction, in the form of nvptx-specific __builtin_nvptx_bar_red_[and/or/popc]
built-in functions.

gcc/ChangeLog:

	* config/nvptx/nvptx.cc (nvptx_print_operand): Add 'p' case, adjust
	comments.
	(enum nvptx_builtins): Add NVPTX_BUILTIN_BAR_RED_AND,
	NVPTX_BUILTIN_BAR_RED_OR, and NVPTX_BUILTIN_BAR_RED_POPC.
	(nvptx_expand_bar_red): New function.
	(nvptx_init_builtins):
	Add DEFs of __builtin_nvptx_bar_red_[and/or/popc].
	(nvptx_expand_builtin): Use nvptx_expand_bar_red to expand
	NVPTX_BUILTIN_BAR_RED_[AND/OR/POPC] cases.

	* config/nvptx/nvptx.md (define_c_enum "unspecv"): Add
	UNSPECV_BARRED_AND, UNSPECV_BARRED_OR, and UNSPECV_BARRED_POPC.
	(BARRED): New int iterator.
	(barred_op,barred_mode,barred_ptxtype): New int attrs.
	(nvptx_barred_<barred_op>): New define_insn.
This commit is contained in:
Chung-Lin Tang 2022-12-21 05:26:06 -08:00
parent f661b3d11e
commit 623daaf8a2
2 changed files with 131 additions and 0 deletions

View File

@ -2883,6 +2883,7 @@ nvptx_mem_maybe_shared_p (const_rtx x)
t -- print a type opcode suffix, promoting QImode to 32 bits
T -- print a type size in bits
u -- print a type opcode suffix without promotions.
p -- print a '!' for constant 0.
x -- print a destination operand that may also be a bit bucket. */
static void
@ -3016,6 +3017,11 @@ nvptx_print_operand (FILE *file, rtx x, int code)
fprintf (file, "@!");
goto common;
case 'p':
if (INTVAL (x) == 0)
fprintf (file, "!");
break;
case 'c':
mode = GET_MODE (XEXP (x, 0));
switch (x_code)
@ -6155,9 +6161,90 @@ enum nvptx_builtins
NVPTX_BUILTIN_CMP_SWAPLL,
NVPTX_BUILTIN_MEMBAR_GL,
NVPTX_BUILTIN_MEMBAR_CTA,
NVPTX_BUILTIN_BAR_RED_AND,
NVPTX_BUILTIN_BAR_RED_OR,
NVPTX_BUILTIN_BAR_RED_POPC,
NVPTX_BUILTIN_MAX
};
/* Expander for 'bar.red' instruction builtins. */
static rtx
nvptx_expand_bar_red (tree exp, rtx target,
machine_mode ARG_UNUSED (m), int ARG_UNUSED (ignore))
{
int code = DECL_MD_FUNCTION_CODE (TREE_OPERAND (CALL_EXPR_FN (exp), 0));
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
if (!target)
target = gen_reg_rtx (mode);
rtx pred, dst;
rtx bar = expand_expr (CALL_EXPR_ARG (exp, 0),
NULL_RTX, SImode, EXPAND_NORMAL);
rtx nthr = expand_expr (CALL_EXPR_ARG (exp, 1),
NULL_RTX, SImode, EXPAND_NORMAL);
rtx cpl = expand_expr (CALL_EXPR_ARG (exp, 2),
NULL_RTX, SImode, EXPAND_NORMAL);
rtx redop = expand_expr (CALL_EXPR_ARG (exp, 3),
NULL_RTX, SImode, EXPAND_NORMAL);
if (CONST_INT_P (bar))
{
if (INTVAL (bar) < 0 || INTVAL (bar) > 15)
{
error_at (EXPR_LOCATION (exp),
"barrier value must be within [0,15]");
return const0_rtx;
}
}
else if (!REG_P (bar))
bar = copy_to_mode_reg (SImode, bar);
if (!CONST_INT_P (nthr) && !REG_P (nthr))
nthr = copy_to_mode_reg (SImode, nthr);
if (!CONST_INT_P (cpl))
{
error_at (EXPR_LOCATION (exp),
"complement argument must be constant");
return const0_rtx;
}
pred = gen_reg_rtx (BImode);
if (!REG_P (redop))
redop = copy_to_mode_reg (SImode, redop);
emit_insn (gen_rtx_SET (pred, gen_rtx_NE (BImode, redop, GEN_INT (0))));
redop = pred;
rtx pat;
switch (code)
{
case NVPTX_BUILTIN_BAR_RED_AND:
dst = gen_reg_rtx (BImode);
pat = gen_nvptx_barred_and (dst, bar, nthr, cpl, redop);
break;
case NVPTX_BUILTIN_BAR_RED_OR:
dst = gen_reg_rtx (BImode);
pat = gen_nvptx_barred_or (dst, bar, nthr, cpl, redop);
break;
case NVPTX_BUILTIN_BAR_RED_POPC:
dst = gen_reg_rtx (SImode);
pat = gen_nvptx_barred_popc (dst, bar, nthr, cpl, redop);
break;
default:
gcc_unreachable ();
}
emit_insn (pat);
if (GET_MODE (dst) == BImode)
{
rtx tmp = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (tmp, gen_rtx_NE (mode, dst, GEN_INT (0))));
dst = tmp;
}
emit_move_insn (target, dst);
return target;
}
static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
/* Return the NVPTX builtin for CODE. */
@ -6198,6 +6285,13 @@ nvptx_init_builtins (void)
DEF (MEMBAR_GL, "membar_gl", (VOID, VOID, NULL_TREE));
DEF (MEMBAR_CTA, "membar_cta", (VOID, VOID, NULL_TREE));
DEF (BAR_RED_AND, "bar_red_and",
(UINT, UINT, UINT, UINT, UINT, NULL_TREE));
DEF (BAR_RED_OR, "bar_red_or",
(UINT, UINT, UINT, UINT, UINT, NULL_TREE));
DEF (BAR_RED_POPC, "bar_red_popc",
(UINT, UINT, UINT, UINT, UINT, NULL_TREE));
#undef DEF
#undef ST
#undef UINT
@ -6240,6 +6334,11 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget),
emit_insn (gen_nvptx_membar_cta ());
return NULL_RTX;
case NVPTX_BUILTIN_BAR_RED_AND:
case NVPTX_BUILTIN_BAR_RED_OR:
case NVPTX_BUILTIN_BAR_RED_POPC:
return nvptx_expand_bar_red (exp, target, mode, ignore);
default: gcc_unreachable ();
}
}

View File

@ -58,6 +58,9 @@
UNSPECV_CAS_LOCAL
UNSPECV_XCHG
UNSPECV_ST
UNSPECV_BARRED_AND
UNSPECV_BARRED_OR
UNSPECV_BARRED_POPC
UNSPECV_BARSYNC
UNSPECV_WARPSYNC
UNSPECV_UNIFORM_WARP_CHECK
@ -2274,6 +2277,35 @@
"TARGET_PTX_6_0"
"%.\\tbar.warp.sync\\t0xffffffff;")
(define_int_iterator BARRED
[UNSPECV_BARRED_AND
UNSPECV_BARRED_OR
UNSPECV_BARRED_POPC])
(define_int_attr barred_op
[(UNSPECV_BARRED_AND "and")
(UNSPECV_BARRED_OR "or")
(UNSPECV_BARRED_POPC "popc")])
(define_int_attr barred_mode
[(UNSPECV_BARRED_AND "BI")
(UNSPECV_BARRED_OR "BI")
(UNSPECV_BARRED_POPC "SI")])
(define_int_attr barred_ptxtype
[(UNSPECV_BARRED_AND "pred")
(UNSPECV_BARRED_OR "pred")
(UNSPECV_BARRED_POPC "u32")])
(define_insn "nvptx_barred_<barred_op>"
[(set (match_operand:<barred_mode> 0 "nvptx_register_operand" "=R")
(unspec_volatile
[(match_operand:SI 1 "nvptx_nonmemory_operand" "Ri")
(match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
(match_operand:SI 3 "const_int_operand" "i")
(match_operand:BI 4 "nvptx_register_operand" "R")]
BARRED))]
""
"\\tbar.red.<barred_op>.<barred_ptxtype> \\t%0, %1, %2, %p3%4;";"
[(set_attr "predicable" "no")])
(define_insn "nvptx_uniform_warp_check"
[(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)]
""