Make MMA builtins use opaque modes

This patch changes powerpc MMA builtins to use the new opaque
mode class and use modes OO (32 bytes) and XO (64 bytes)
instead of POI/PXI. Using the opaque modes prevents
optimization from trying to do anything with vector
pair/quad, which was the problem we were seeing with the
partial integer modes.

gcc/
	* config/rs6000/mma.md (unspec): Add assemble/extract UNSPECs.
	(movoi): Change to movoo.
	(*movpoi): Change to *movoo.
	(movxi): Change to movxo.
	(*movpxi): Change to *movxo.
	(mma_assemble_pair): Change to OO mode.
	(*mma_assemble_pair): New define_insn_and_split.
	(mma_disassemble_pair): New define_expand.
	(*mma_disassemble_pair): New define_insn_and_split.
	(mma_assemble_acc): Change to XO mode.
	(*mma_assemble_acc): Change to XO mode.
	(mma_disassemble_acc): New define_expand.
	(*mma_disassemble_acc): New define_insn_and_split.
	(mma_<acc>): Change to XO mode.
	(mma_<vv>): Change to XO mode.
	(mma_<avv>): Change to XO mode.
	(mma_<pv>): Change to OO mode.
	(mma_<apv>): Change to XO/OO mode.
	(mma_<vvi4i4i8>): Change to XO mode.
	(mma_<avvi4i4i8>): Change to XO mode.
	(mma_<vvi4i4i2>): Change to XO mode.
	(mma_<avvi4i4i2>): Change to XO mode.
	(mma_<vvi4i4>): Change to XO mode.
	(mma_<avvi4i4>): Change to XO mode.
	(mma_<pvi4i2>): Change to XO/OO mode.
	(mma_<apvi4i2>): Change to XO/OO mode.
	(mma_<vvi4i4i4>): Change to XO mode.
	(mma_<avvi4i4i4>): Change to XO mode.
	* config/rs6000/predicates.md (input_operand): Allow opaque.
	(mma_disassemble_output_operand): New predicate.
	* config/rs6000/rs6000-builtin.def:
	Changes to disassemble builtins.
	* config/rs6000/rs6000-call.c (rs6000_return_in_memory):
	Disallow __vector_pair/__vector_quad as return types.
	(rs6000_promote_function_mode): Remove function return type
	check because we can't test it here any more.
	(rs6000_function_arg): Do not allow __vector_pair/__vector_quad
	as as function arguments.
	(rs6000_gimple_fold_mma_builtin):
	Handle mma_disassemble_* builtins.
	(rs6000_init_builtins): Create types for XO/OO modes.
	* config/rs6000/rs6000-modes.def: DElete OI, XI,
	POI, and PXI modes, and create XO and OO modes.
	* config/rs6000/rs6000-string.c (expand_block_move):
	Update to OO mode.
	* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok_uncached):
	Update for XO/OO modes.
	(rs6000_rtx_costs): Make UNSPEC_MMA_XXSETACCZ cost 0.
	(rs6000_modes_tieable_p): Update for XO/OO modes.
	(rs6000_debug_reg_global): Update for XO/OO modes.
	(rs6000_setup_reg_addr_masks): Update for XO/OO modes.
	(rs6000_init_hard_regno_mode_ok): Update for XO/OO modes.
	(reg_offset_addressing_ok_p): Update for XO/OO modes.
	(rs6000_emit_move): Update for XO/OO modes.
	(rs6000_preferred_reload_class): Update for XO/OO modes.
	(rs6000_split_multireg_move): Update for XO/OO modes.
	(rs6000_mangle_type): Update for opaque types.
	(rs6000_invalid_conversion): Update for XO/OO modes.
	* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P):
	Update for XO/OO modes.
	* config/rs6000/rs6000.md (RELOAD): Update for XO/OO modes.
gcc/testsuite/
	* gcc.target/powerpc/mma-double-test.c (main): Call abort for failure.
	* gcc.target/powerpc/mma-single-test.c (main): Call abort for failure.
	* gcc.target/powerpc/pr96506.c: Rename to pr96506-1.c.
	* gcc.target/powerpc/pr96506-2.c: New test.
This commit is contained in:
Aaron Sawdey 2020-11-17 11:38:20 -05:00
parent 1e2d8575ac
commit f8f8909af1
13 changed files with 517 additions and 372 deletions

View File

@ -19,24 +19,18 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
;; The MMA patterns use the multi-register PXImode and POImode partial
;; integer modes to implement the target specific __vector_quad and
;; __vector_pair types that the MMA built-in functions reference.
;; To use these modes, we must define XImode and OImode move patterns
;; so the independent parts of the compiler can use our large partial
;; integer modes. However, if we enable the XImode and OImode move
;; patterns, then the compiler will attempt to use them and this can
;; cause byte swapping issues on litte-endian systems. We don't need
;; the XImode and OImode move patterns for actual code generation,
;; therefore, we define the XImode and OImode move patterns, but we
;; disable their use with a "false" condition flag.
;; The MMA patterns use the multi-register XOmode and OOmode opaque
;; modes to implement the target specific __vector_quad and
;; __vector_pair types that the MMA built-in functions reference. We
;; use OPAQUE_MODE to prevent anything from trying to open them up.
(define_constants [(MAX_MMA_OPERANDS 7)])
;; Constants for creating unspecs
(define_c_enum "unspec"
[UNSPEC_MMA_ASSEMBLE_ACC
[UNSPEC_MMA_ASSEMBLE
UNSPEC_MMA_EXTRACT
UNSPEC_MMA_PMXVBF16GER2
UNSPEC_MMA_PMXVBF16GER2NN
UNSPEC_MMA_PMXVBF16GER2NP
@ -97,6 +91,7 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_XXSETACCZ
])
;; MMA instructions with 1 accumulator argument
@ -265,31 +260,22 @@
(UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")])
;; Define a disabled OImode move pattern, so we can use POImode.
(define_expand "movoi"
[(set (match_operand:OI 0 "nonimmediate_operand")
(match_operand:OI 1 "input_operand"))]
"0"
{
gcc_unreachable ();
})
;; Vector pair support. POImode can only live in VSRs.
(define_expand "movpoi"
[(set (match_operand:POI 0 "nonimmediate_operand")
(match_operand:POI 1 "input_operand"))]
;; Vector pair support. OOmode can only live in VSRs.
(define_expand "movoo"
[(set (match_operand:OO 0 "nonimmediate_operand")
(match_operand:OO 1 "input_operand"))]
"TARGET_MMA"
{
rs6000_emit_move (operands[0], operands[1], POImode);
rs6000_emit_move (operands[0], operands[1], OOmode);
DONE;
})
(define_insn_and_split "*movpoi"
[(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
(match_operand:POI 1 "input_operand" "m,wa,wa"))]
(define_insn_and_split "*movoo"
[(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
(match_operand:OO 1 "input_operand" "m,wa,wa"))]
"TARGET_MMA
&& (gpc_reg_operand (operands[0], POImode)
|| gpc_reg_operand (operands[1], POImode))"
&& (gpc_reg_operand (operands[0], OOmode)
|| gpc_reg_operand (operands[1], OOmode))"
"@
lxvp%X1 %x0,%1
stxvp%X0 %x1,%0
@ -305,287 +291,370 @@
(set_attr "length" "*,*,8")])
;; Define a disabled XImode move pattern, so we can use PXImode.
(define_expand "movxi"
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "input_operand"))]
"0"
{
gcc_unreachable ();
})
;; Vector quad support. PXImode can only live in FPRs.
(define_expand "movpxi"
[(set (match_operand:PXI 0 "nonimmediate_operand")
(match_operand:PXI 1 "input_operand"))]
;; Vector quad support. XOmode can only live in FPRs.
(define_expand "movxo"
[(set (match_operand:XO 0 "nonimmediate_operand")
(match_operand:XO 1 "input_operand"))]
"TARGET_MMA"
{
rs6000_emit_move (operands[0], operands[1], PXImode);
rs6000_emit_move (operands[0], operands[1], XOmode);
DONE;
})
(define_insn_and_split "*movpxi"
[(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
(define_insn_and_split "*movxo"
[(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d")
(match_operand:XO 1 "input_operand" "m,d,d"))]
"TARGET_MMA
&& (gpc_reg_operand (operands[0], PXImode)
|| gpc_reg_operand (operands[1], PXImode))"
&& (gpc_reg_operand (operands[0], XOmode)
|| gpc_reg_operand (operands[1], XOmode))"
"@
#
#
#
xxsetaccz %A0"
"&& reload_completed
&& !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
#"
"&& reload_completed"
[(const_int 0)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
DONE;
}
[(set_attr "type" "vecload,vecstore,veclogical,mma")
(set_attr "length" "8,8,16,*")
(set_attr "max_prefixed_insns" "2,2,*,*")])
[(set_attr "type" "vecload,vecstore,veclogical")
(set_attr "length" "8,8,16")
(set_attr "max_prefixed_insns" "2,2,*")])
(define_expand "mma_assemble_pair"
[(match_operand:POI 0 "vsx_register_operand")
(match_operand:V16QI 1 "input_operand")
(match_operand:V16QI 2 "input_operand")]
[(match_operand:OO 0 "vsx_register_operand")
(match_operand:V16QI 1 "mma_assemble_input_operand")
(match_operand:V16QI 2 "mma_assemble_input_operand")]
"TARGET_MMA"
{
rtx dst;
rtx src = gen_rtx_UNSPEC (OOmode,
gen_rtvec (2, operands[1], operands[2]),
UNSPEC_MMA_ASSEMBLE);
emit_move_insn (operands[0], src);
DONE;
})
/* Let the compiler know the code below fully defines our output value. */
emit_clobber (operands[0]);
(define_insn_and_split "*mma_assemble_pair"
[(set (match_operand:OO 0 "vsx_register_operand" "=wa")
(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
UNSPEC_MMA_ASSEMBLE))]
"TARGET_MMA"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx src = gen_rtx_UNSPEC (OOmode,
gen_rtvec (2, operands[1], operands[2]),
UNSPEC_MMA_ASSEMBLE);
rs6000_split_multireg_move (operands[0], src);
DONE;
})
dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
emit_move_insn (dst, operands[1]);
dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
emit_move_insn (dst, operands[2]);
(define_expand "mma_disassemble_pair"
[(match_operand:V16QI 0 "mma_disassemble_output_operand")
(match_operand:OO 1 "vsx_register_operand")
(match_operand 2 "const_0_to_1_operand")]
"TARGET_MMA"
{
rtx src;
int regoff = INTVAL (operands[2]);
src = gen_rtx_UNSPEC (V16QImode,
gen_rtvec (2, operands[1], GEN_INT (regoff)),
UNSPEC_MMA_EXTRACT);
emit_move_insn (operands[0], src);
DONE;
})
(define_insn_and_split "*mma_disassemble_pair"
[(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
(unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa")
(match_operand 2 "const_0_to_1_operand")]
UNSPEC_MMA_EXTRACT))]
"TARGET_MMA
&& vsx_register_operand (operands[1], OOmode)"
"#"
"&& reload_completed"
[(const_int 0)]
{
int reg = REGNO (operands[1]);
int regoff = INTVAL (operands[2]);
rtx src = gen_rtx_REG (V16QImode, reg + regoff);
emit_move_insn (operands[0], src);
DONE;
})
(define_expand "mma_assemble_acc"
[(match_operand:PXI 0 "fpr_reg_operand")
(match_operand:V16QI 1 "input_operand")
(match_operand:V16QI 2 "input_operand")
(match_operand:V16QI 3 "input_operand")
(match_operand:V16QI 4 "input_operand")]
[(match_operand:XO 0 "fpr_reg_operand")
(match_operand:V16QI 1 "mma_assemble_input_operand")
(match_operand:V16QI 2 "mma_assemble_input_operand")
(match_operand:V16QI 3 "mma_assemble_input_operand")
(match_operand:V16QI 4 "mma_assemble_input_operand")]
"TARGET_MMA"
{
rtx src = gen_rtx_UNSPEC (PXImode,
rtx src = gen_rtx_UNSPEC (XOmode,
gen_rtvec (4, operands[1], operands[2],
operands[3], operands[4]),
UNSPEC_MMA_ASSEMBLE_ACC);
UNSPEC_MMA_ASSEMBLE);
emit_move_insn (operands[0], src);
DONE;
})
(define_insn_and_split "*mma_assemble_acc"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
UNSPEC_MMA_ASSEMBLE_ACC))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=d")
(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
UNSPEC_MMA_ASSEMBLE))]
"TARGET_MMA
&& fpr_reg_operand (operands[0], PXImode)"
&& fpr_reg_operand (operands[0], XOmode)"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx src = gen_rtx_UNSPEC (PXImode,
rtx src = gen_rtx_UNSPEC (XOmode,
gen_rtvec (4, operands[1], operands[2],
operands[3], operands[4]),
UNSPEC_MMA_ASSEMBLE_ACC);
UNSPEC_MMA_ASSEMBLE);
rs6000_split_multireg_move (operands[0], src);
DONE;
})
(define_expand "mma_disassemble_acc"
[(match_operand:V16QI 0 "mma_disassemble_output_operand")
(match_operand:XO 1 "fpr_reg_operand")
(match_operand 2 "const_0_to_3_operand")]
"TARGET_MMA"
{
rtx src;
int regoff = INTVAL (operands[2]);
src = gen_rtx_UNSPEC (V16QImode,
gen_rtvec (2, operands[1], GEN_INT (regoff)),
UNSPEC_MMA_EXTRACT);
emit_move_insn (operands[0], src);
DONE;
})
(define_insn_and_split "*mma_disassemble_acc"
[(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
(unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
(match_operand 2 "const_0_to_3_operand")]
UNSPEC_MMA_EXTRACT))]
"TARGET_MMA
&& fpr_reg_operand (operands[1], XOmode)"
"#"
"&& reload_completed"
[(const_int 0)]
{
int reg = REGNO (operands[1]);
int regoff = INTVAL (operands[2]);
rtx src = gen_rtx_REG (V16QImode, reg + regoff);
emit_move_insn (operands[0], src);
DONE;
})
;; MMA instructions that do not use their accumulators as an input, still
;; must not allow their vector operands to overlap the registers used by
;; the accumulator. We enforce this by marking the output as early clobber.
(define_insn "mma_<acc>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
"TARGET_MMA"
"<acc> %A0"
[(set_attr "type" "mma")])
;; We can't have integer constants in XOmode so we wrap this in an UNSPEC.
(define_expand "mma_xxsetaccz"
[(set (match_operand:PXI 0 "fpr_reg_operand")
[(set (match_operand:XO 0 "fpr_reg_operand")
(const_int 0))]
"TARGET_MMA"
{
emit_insn (gen_movpxi (operands[0], const0_rtx));
rtx xo0 = gen_rtx_UNSPEC (XOmode, gen_rtvec (1, const0_rtx),
UNSPEC_MMA_XXSETACCZ);
emit_insn (gen_rtx_SET (operands[0], xo0));
DONE;
})
(define_insn_and_split "*mma_xxsetaccz"
[(set (match_operand:XO 0 "fpr_reg_operand" "=d")
(unspec:XO [(match_operand 1 "const_0_to_1_operand" "O")]
UNSPEC_MMA_XXSETACCZ))]
"TARGET_MMA"
"xxsetaccz %A0"
"&& reload_completed"
[(set (match_dup 0) (unspec:XO [(match_dup 1)] UNSPEC_MMA_XXSETACCZ))]
""
[(set_attr "type" "mma")
(set_attr "length" "4")])
(define_insn "mma_<vv>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
MMA_VV))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
MMA_VV))]
"TARGET_MMA"
"<vv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
(define_insn "mma_<avv>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
MMA_AVV))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
MMA_AVV))]
"TARGET_MMA"
"<avv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
(define_insn "mma_<pv>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
MMA_PV))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
MMA_PV))]
"TARGET_MMA"
"<pv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
(define_insn "mma_<apv>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
(match_operand:POI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
MMA_APV))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:OO 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
MMA_APV))]
"TARGET_MMA"
"<apv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
(define_insn "mma_<vvi4i4i8>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "u8bit_cint_operand" "n")]
MMA_VVI4I4I8))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "u8bit_cint_operand" "n")]
MMA_VVI4I4I8))]
"TARGET_MMA"
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<avvi4i4i8>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "u8bit_cint_operand" "n")]
MMA_AVVI4I4I8))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "u8bit_cint_operand" "n")]
MMA_AVVI4I4I8))]
"TARGET_MMA"
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<vvi4i4i2>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_3_operand" "n")]
MMA_VVI4I4I2))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_3_operand" "n")]
MMA_VVI4I4I2))]
"TARGET_MMA"
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<avvi4i4i2>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "const_0_to_3_operand" "n")]
MMA_AVVI4I4I2))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "const_0_to_3_operand" "n")]
MMA_AVVI4I4I2))]
"TARGET_MMA"
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<vvi4i4>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")]
MMA_VVI4I4))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")]
MMA_VVI4I4))]
"TARGET_MMA"
"<vvi4i4> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<avvi4i4>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")]
MMA_AVVI4I4))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")]
MMA_AVVI4I4))]
"TARGET_MMA"
"<avvi4i4> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<pvi4i2>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_3_operand" "n")]
MMA_PVI4I2))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_3_operand" "n")]
MMA_PVI4I2))]
"TARGET_MMA"
"<pvi4i2> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<apvi4i2>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
(match_operand:POI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_3_operand" "n")]
MMA_APVI4I2))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:OO 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_3_operand" "n")]
MMA_APVI4I2))]
"TARGET_MMA"
"<apvi4i2> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<vvi4i4i4>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")]
MMA_VVI4I4I4))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:SI 3 "const_0_to_15_operand" "n")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")]
MMA_VVI4I4I4))]
"TARGET_MMA"
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
(set_attr "length" "8")])
(define_insn "mma_<avvi4i4i4>"
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "const_0_to_15_operand" "n")]
MMA_AVVI4I4I4))]
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
(match_operand:V16QI 2 "vsx_register_operand" "wa")
(match_operand:V16QI 3 "vsx_register_operand" "wa")
(match_operand:SI 4 "const_0_to_15_operand" "n")
(match_operand:SI 5 "const_0_to_15_operand" "n")
(match_operand:SI 6 "const_0_to_15_operand" "n")]
MMA_AVVI4I4I4))]
"TARGET_MMA"
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")

View File

@ -1144,6 +1144,18 @@
(match_test "(mode == V16QImode
&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
;; Return 1 if this operand is valid for an MMA disassemble insn.
(define_predicate "mma_disassemble_output_operand"
(match_code "reg,subreg,mem")
{
if (SUBREG_P (op))
op = SUBREG_REG (op);
if (!REG_P (op))
return true;
return vsx_register_operand (op, mode);
})
;; Return true if operand is an operator used in rotate-and-mask instructions.
(define_predicate "rotate_mask_operator"
(match_code "rotate,ashift,lshiftrt"))

View File

@ -352,7 +352,7 @@
| RS6000_BTC_UNARY), \
CODE_FOR_ ## ICODE) /* ICODE */
#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE) \
#define BU_MMA_2(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
"__builtin_mma_" NAME, /* NAME */ \
RS6000_BTM_MMA, /* MASK */ \
@ -360,7 +360,13 @@
| RS6000_BTC_BINARY \
| RS6000_BTC_VOID \
| RS6000_BTC_GIMPLE), \
CODE_FOR_nothing) /* ICODE */
CODE_FOR_nothing) /* ICODE */ \
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \
"__builtin_mma_" NAME "_internal", /* NAME */ \
RS6000_BTM_MMA, /* MASK */ \
(RS6000_BTC_ ## ATTR /* ATTR */ \
| RS6000_BTC_BINARY), \
CODE_FOR_ ## ICODE) /* ICODE */
#define BU_MMA_3(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
@ -3108,8 +3114,8 @@ BU_MMA_1 (XXMFACC, "xxmfacc", QUAD, mma_xxmfacc)
BU_MMA_1 (XXMTACC, "xxmtacc", QUAD, mma_xxmtacc)
BU_MMA_1 (XXSETACCZ, "xxsetaccz", MISC, mma_xxsetaccz)
BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc", QUAD, nothing)
BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc", QUAD, mma_disassemble_acc)
BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, mma_disassemble_pair)
BU_MMA_3 (ASSEMBLE_PAIR, "assemble_pair", MISC, mma_assemble_pair)
BU_MMA_3 (XVBF16GER2, "xvbf16ger2", MISC, mma_xvbf16ger2)

View File

@ -6325,6 +6325,22 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
bool
rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
/* We do not allow MMA types being used as return values. Only report
the invalid return value usage the first time we encounter it. */
if (cfun
&& !cfun->machine->mma_return_type_error
&& TREE_TYPE (cfun->decl) == fntype
&& (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
{
/* Record we have now handled function CFUN, so the next time we
are called, we do not re-report the same error. */
cfun->machine->mma_return_type_error = true;
if (TYPE_CANONICAL (type) != NULL_TREE)
type = TYPE_CANONICAL (type);
error ("invalid use of MMA type %qs as a function return value",
IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
}
/* For the Darwin64 ABI, test if we can fit the return value in regs. */
if (TARGET_MACHO
&& rs6000_darwin64_abi
@ -6577,30 +6593,8 @@ machine_mode
rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
machine_mode mode,
int *punsignedp ATTRIBUTE_UNUSED,
const_tree, int for_return)
const_tree, int for_return ATTRIBUTE_UNUSED)
{
/* Warning: this is a static local variable and not always NULL!
This function is called multiple times for the same function
and return value. PREV_FUNC is used to keep track of the
first time we encounter a function's return value in order
to not report an error with that return value multiple times. */
static struct function *prev_func = NULL;
/* We do not allow MMA types being used as return values. Only report
the invalid return value usage the first time we encounter it. */
if (for_return
&& prev_func != cfun
&& (mode == POImode || mode == PXImode))
{
/* Record we have now handled function CFUN, so the next time we
are called, we do not re-report the same error. */
prev_func = cfun;
if (TYPE_CANONICAL (type) != NULL_TREE)
type = TYPE_CANONICAL (type);
error ("invalid use of MMA type %qs as a function return value",
IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
}
PROMOTE_MODE (mode, *punsignedp, type);
return mode;
@ -7552,7 +7546,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
int n_elts;
/* We do not allow MMA types being used as function arguments. */
if (mode == POImode || mode == PXImode)
if (mode == OOmode || mode == XOmode)
{
if (TYPE_CANONICAL (type) != NULL_TREE)
type = TYPE_CANONICAL (type);
@ -10073,7 +10067,8 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
}
unsigned attr_args = attr & RS6000_BTC_OPND_MASK;
if (attr & RS6000_BTC_QUAD)
if (attr & RS6000_BTC_QUAD
|| fcode == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
attr_args++;
gcc_assert (nopnds == attr_args);
@ -11687,23 +11682,24 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
gimple *new_call;
tree new_decl;
if (rs6000_builtin_info[fncode + 1].icode == CODE_FOR_nothing)
if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
|| fncode == MMA_BUILTIN_DISASSEMBLE_PAIR)
{
/* This is an MMA disassemble built-in function. */
gcc_assert (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
|| fncode == MMA_BUILTIN_DISASSEMBLE_PAIR);
push_gimplify_context (true);
unsigned nvec = (fncode == MMA_BUILTIN_DISASSEMBLE_ACC) ? 4 : 2;
tree dst_ptr = gimple_call_arg (stmt, 0);
tree src_ptr = gimple_call_arg (stmt, 1);
tree src_type = TREE_TYPE (src_ptr);
tree src = make_ssa_name (TREE_TYPE (src_type));
gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
/* If we are not disassembling an accumulator or our destination is
another accumulator, then just copy the entire thing as is. */
if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC
|| TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
/* If we are not disassembling an accumulator/pair or our destination is
another accumulator/pair, then just copy the entire thing as is. */
if ((fncode == MMA_BUILTIN_DISASSEMBLE_ACC
&& TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
|| (fncode == MMA_BUILTIN_DISASSEMBLE_PAIR
&& TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node))
{
tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
src_type, dst_ptr));
@ -11713,29 +11709,33 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
return true;
}
/* We're disassembling an accumulator into a different type, so we need
/* If we're disassembling an accumulator into a different type, we need
to emit a xxmfacc instruction now, since we cannot do it later. */
new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
new_call = gimple_build_call (new_decl, 1, src);
src = make_ssa_name (vector_quad_type_node);
gimple_call_set_lhs (new_call, src);
gimple_seq_add_stmt (&new_seq, new_call);
if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC)
{
new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
new_call = gimple_build_call (new_decl, 1, src);
src = make_ssa_name (vector_quad_type_node);
gimple_call_set_lhs (new_call, src);
gimple_seq_add_stmt (&new_seq, new_call);
}
/* Copy the accumulator vector by vector. */
/* Copy the accumulator/pair vector by vector. */
new_decl = rs6000_builtin_decls[fncode + 1];
tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
ptr_mode, true);
tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr);
tree array_type = build_array_type_nelts (unsigned_V16QI_type_node, 4);
tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src);
for (unsigned i = 0; i < 4; i++)
for (unsigned i = 0; i < nvec; i++)
{
unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array,
build_int_cst (size_type_node, i),
NULL_TREE, NULL_TREE);
unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
build_int_cst (dst_type, index * 16));
gimplify_assign (dst, ref, &new_seq);
tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
new_call = gimple_build_call (new_decl, 2, src,
build_int_cstu (uint16_type_node, i));
gimple_call_set_lhs (new_call, dstssa);
gimple_seq_add_stmt (&new_seq, new_call);
gimplify_assign (dst, dstssa, &new_seq);
}
pop_gimplify_context (NULL);
gsi_replace_with_seq (gsi, new_seq, true);
@ -13206,17 +13206,23 @@ rs6000_init_builtins (void)
/* Vector pair and vector quad support. */
if (TARGET_EXTRA_BUILTINS)
{
vector_pair_type_node = make_unsigned_type (256);
vector_pair_type_node = make_node (OPAQUE_TYPE);
SET_TYPE_MODE (vector_pair_type_node, OOmode);
TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
SET_TYPE_ALIGN (vector_pair_type_node, 256);
SET_TYPE_MODE (vector_pair_type_node, POImode);
layout_type (vector_pair_type_node);
TYPE_USER_ALIGN (vector_pair_type_node) = 0;
lang_hooks.types.register_builtin_type (vector_pair_type_node,
"__vector_pair");
vector_quad_type_node = make_unsigned_type (512);
vector_quad_type_node = make_node (OPAQUE_TYPE);
SET_TYPE_MODE (vector_quad_type_node, XOmode);
TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
SET_TYPE_ALIGN (vector_quad_type_node, 512);
SET_TYPE_MODE (vector_quad_type_node, PXImode);
layout_type (vector_quad_type_node);
TYPE_USER_ALIGN (vector_quad_type_node) = 0;
lang_hooks.types.register_builtin_type (vector_quad_type_node,
"__vector_quad");
}
@ -13252,8 +13258,8 @@ rs6000_init_builtins (void)
builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
builtin_mode_to_type[POImode][1] = vector_pair_type_node;
builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
tdecl = add_builtin_type ("__bool char", bool_char_type_node);
TYPE_NAME (bool_char_type_node) = tdecl;
@ -14065,21 +14071,21 @@ mma_init_builtins (void)
}
else
{
if ((attr & RS6000_BTC_QUAD) == 0)
if (!(d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
|| d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
&& (attr & RS6000_BTC_QUAD) == 0)
attr_args--;
/* Ensure we have the correct number and type of operands. */
gcc_assert (attr_args == insn_data[icode].n_operands - 1);
}
if (icode == CODE_FOR_nothing)
/* This is a disassemble pair/acc function. */
if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
|| d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)
{
/* This is a disassemble MMA built-in function. */
gcc_assert (attr_args == RS6000_BTC_BINARY
&& (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
|| d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
op[nopnds++] = build_pointer_type (void_type_node);
if (attr & RS6000_BTC_QUAD)
if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC)
op[nopnds++] = build_pointer_type (vector_quad_type_node);
else
op[nopnds++] = build_pointer_type (vector_pair_type_node);
@ -14087,13 +14093,17 @@ mma_init_builtins (void)
else
{
/* This is a normal MMA built-in function. */
unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
unsigned j = 0;
if (attr & RS6000_BTC_QUAD
&& d->code != MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
&& d->code != MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
j = 1;
for (; j < (unsigned) insn_data[icode].n_operands; j++)
{
machine_mode mode = insn_data[icode].operand[j].mode;
if (gimple_func && mode == PXImode)
if (gimple_func && mode == XOmode)
op[nopnds++] = build_pointer_type (vector_quad_type_node);
else if (gimple_func && mode == POImode
else if (gimple_func && mode == OOmode
&& d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
op[nopnds++] = build_pointer_type (vector_pair_type_node);
else
@ -14725,7 +14735,7 @@ rs6000_common_init_builtins (void)
continue;
}
if (icode == CODE_FOR_nothing)
if (icode == CODE_FOR_nothing)
{
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
@ -14791,7 +14801,7 @@ rs6000_common_init_builtins (void)
continue;
}
if (icode == CODE_FOR_nothing)
if (icode == CODE_FOR_nothing)
{
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
@ -14800,9 +14810,9 @@ rs6000_common_init_builtins (void)
continue;
}
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
mode2 = insn_data[icode].operand[2].mode;
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
mode2 = insn_data[icode].operand[2].mode;
type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
d->code, d->name);
@ -14835,7 +14845,7 @@ rs6000_common_init_builtins (void)
NULL_TREE);
}
else
{
{
enum insn_code icode = d->icode;
if (d->name == 0)
{
@ -14846,7 +14856,7 @@ rs6000_common_init_builtins (void)
continue;
}
if (icode == CODE_FOR_nothing)
if (icode == CODE_FOR_nothing)
{
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
@ -14855,8 +14865,8 @@ rs6000_common_init_builtins (void)
continue;
}
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
d->code, d->name);

View File

@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2); /* V2SI */
combination. */
PARTIAL_INT_MODE (TI, 128, PTI);
/* Define, but don't use the larger integer modes. We need an integer mode
defined that is the same size as the vector pair and vector quad modes. */
INT_MODE (OI, 32);
INT_MODE (XI, 64);
/* Modes used by __vector_pair and __vector_quad. */
PARTIAL_INT_MODE (OI, 256, POI); /* __vector_pair. */
PARTIAL_INT_MODE (XI, 512, PXI); /* __vector_quad. */
OPAQUE_MODE (OO, 32);
OPAQUE_MODE (XO, 64);

View File

@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
rtx src, dest;
bool move_with_length = false;
/* Use POImode for paired vsx load/store. Use V2DI for single
/* Use OOmode for paired vsx load/store. Use V2DI for single
unaligned vsx load/store, for consistency with what other
expansions (compare) already do, and so we can use lxvd2x on
p8. Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
&& (align >= 256 || !STRICT_ALIGNMENT))
{
move_bytes = 32;
mode = POImode;
gen_func.mov = gen_movpoi;
mode = OOmode;
gen_func.mov = gen_movoo;
}
else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
&& VECTOR_MEM_VSX_P (V2DImode)

View File

@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
mode = GET_MODE_INNER (mode);
/* Vector pair modes need even/odd VSX register pairs. Only allow vector
registers. We need to allow OImode to have the same registers as POImode,
even though we do not enable the move pattern for OImode. */
if (mode == POImode || mode == OImode)
registers. */
if (mode == OOmode)
return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
/* MMA accumulator modes need FPR registers divisible by 4. We need to allow
XImode to have the same registers as PXImode, even though we do not enable
the move pattern for XImode. */
if (mode == PXImode || mode == XImode)
/* MMA accumulator modes need FPR registers divisible by 4. */
if (mode == XOmode)
return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
/* PTImode can only go in GPRs. Quad word memory operations require even/odd
@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
GPR registers, and TImode can go in any GPR as well as VSX registers (PR
57744).
Similarly, don't allow POImode (vector pair, restricted to even VSX
registers) or PXImode (vector quad, restricted to FPR registers divisible
Similarly, don't allow OOmode (vector pair, restricted to even VSX
registers) or XOmode (vector quad, restricted to FPR registers divisible
by 4) to tie with other modes.
Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
static bool
rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
|| mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
|| mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
return mode1 == mode2;
if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
V2DFmode,
V8SFmode,
V4DFmode,
OImode,
XImode,
POImode,
PXImode,
OOmode,
XOmode,
CCmode,
CCUNSmode,
CCEQmode,
@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
since it will be broken into two vector moves. Vector quads can
only do offset loads. */
else if ((addr_mask != 0) && TARGET_MMA
&& (m2 == POImode || m2 == PXImode))
&& (m2 == OOmode || m2 == XOmode))
{
addr_mask |= RELOAD_REG_OFFSET;
if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
{
addr_mask |= RELOAD_REG_QUAD_OFFSET;
if (m2 == POImode)
if (m2 == OOmode)
addr_mask |= RELOAD_REG_INDEXED;
}
}
@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
/* Add support for vector pairs and vector quad registers. */
if (TARGET_MMA)
{
rs6000_vector_unit[POImode] = VECTOR_NONE;
rs6000_vector_mem[POImode] = VECTOR_VSX;
rs6000_vector_align[POImode] = 256;
rs6000_vector_unit[OOmode] = VECTOR_NONE;
rs6000_vector_mem[OOmode] = VECTOR_VSX;
rs6000_vector_align[OOmode] = 256;
rs6000_vector_unit[PXImode] = VECTOR_NONE;
rs6000_vector_mem[PXImode] = VECTOR_VSX;
rs6000_vector_align[PXImode] = 512;
rs6000_vector_unit[XOmode] = VECTOR_NONE;
rs6000_vector_mem[XOmode] = VECTOR_VSX;
rs6000_vector_align[XOmode] = 512;
}
/* Register class constraints for the constraints that depend on compile
@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
if (TARGET_MMA)
{
reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
}
}
}
@ -8129,8 +8124,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
/* The vector pair/quad types support offset addressing if the
underlying vectors support offset addressing. */
case E_POImode:
case E_PXImode:
case E_OOmode:
case E_XOmode:
return TARGET_MMA;
case E_SDmode:
@ -10323,11 +10318,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
operands[1] = force_const_mem (mode, operands[1]);
break;
case E_POImode:
case E_PXImode:
case E_OOmode:
case E_XOmode:
if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
error ("%qs is an opaque type, and you can't set it to other values.",
(mode == POImode) ? "__vector_pair" : "__vector_quad");
(mode == OOmode) ? "__vector_pair" : "__vector_quad");
break;
case E_SImode:
@ -12596,10 +12591,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
the GPR registers. */
if (rclass == GEN_OR_FLOAT_REGS)
{
if (mode == POImode)
if (mode == OOmode)
return VSX_REGS;
if (mode == PXImode)
if (mode == XOmode)
return FLOAT_REGS;
if (GET_MODE_CLASS (mode) == MODE_INT)
@ -16323,15 +16318,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we have a vector quad register for MMA, and this is a load or store,
see if we can use vector paired load/stores. */
if (mode == PXImode && TARGET_MMA
if (mode == XOmode && TARGET_MMA
&& (MEM_P (dst) || MEM_P (src)))
{
reg_mode = POImode;
reg_mode = OOmode;
nregs /= 2;
}
/* If we have a vector pair/quad mode, split it into two/four separate
vectors. */
else if (mode == POImode || mode == PXImode)
else if (mode == OOmode || mode == XOmode)
reg_mode = V1TImode;
else if (FP_REGNO_P (reg))
reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@ -16377,12 +16372,16 @@ rs6000_split_multireg_move (rtx dst, rtx src)
return;
}
/* The __vector_pair and __vector_quad modes are multi-register modes,
so if have to load or store the registers, we have to be careful to
properly swap them if we're in little endian mode below. This means
the last register gets the first memory location. */
if (mode == POImode || mode == PXImode)
/* The __vector_pair and __vector_quad modes are multi-register
modes, so if we have to load or store the registers, we have to be
careful to properly swap them if we're in little endian mode
below. This means the last register gets the first memory
location. We also need to be careful of using the right register
numbers if we are splitting XO to OO. */
if (mode == OOmode || mode == XOmode)
{
nregs = hard_regno_nregs (reg, mode);
int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
if (MEM_P (dst))
{
unsigned offset = 0;
@ -16391,15 +16390,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
if (TARGET_MMA
&& GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
for (int i = 0; i < nregs; i++)
for (int i = 0; i < nregs; i += reg_mode_nregs)
{
unsigned subreg = (WORDS_BIG_ENDIAN)
? i * size : (nregs - 1 - i) * size;
unsigned subreg =
(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
rtx dst2 = adjust_address (dst, reg_mode, offset);
rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
offset += size;
emit_insn (gen_rtx_SET (dst2, src2));
}
@ -16412,11 +16411,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
unsigned offset = 0;
unsigned size = GET_MODE_SIZE (reg_mode);
for (int i = 0; i < nregs; i++)
for (int i = 0; i < nregs; i += reg_mode_nregs)
{
unsigned subreg = (WORDS_BIG_ENDIAN)
? i * size : (nregs - 1 - i) * size;
rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
unsigned subreg =
(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
rtx src2 = adjust_address (src, reg_mode, offset);
offset += size;
emit_insn (gen_rtx_SET (dst2, src2));
@ -16425,7 +16424,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
if (TARGET_MMA
&& GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
return;
@ -16433,9 +16432,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
if (GET_CODE (src) == UNSPEC)
{
gcc_assert (REG_P (dst)
&& FP_REGNO_P (REGNO (dst))
&& XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
gcc_assert (REG_P (dst));
if (GET_MODE (src) == XOmode)
gcc_assert (FP_REGNO_P (REGNO (dst)));
if (GET_MODE (src) == OOmode)
gcc_assert (VSX_REGNO_P (REGNO (dst)));
reg_mode = GET_MODE (XVECEXP (src, 0, 0));
for (int i = 0; i < XVECLEN (src, 0); i++)
@ -16446,7 +16448,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* We are writing an accumulator register, so we have to
prime it after we've written it. */
emit_insn (gen_mma_xxmtacc (dst, dst));
if (GET_MODE (src) == XOmode)
emit_insn (gen_mma_xxmtacc (dst, dst));
return;
}
@ -16459,22 +16462,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
if (TARGET_MMA
&& GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
/* Move register range backwards, if we might have destructive
overlap. */
int i;
for (i = nregs - 1; i >= 0; i--)
emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
i * reg_mode_size),
simplify_gen_subreg (reg_mode, src, mode,
i * reg_mode_size)));
/* XO/OO are opaque so cannot use subregs. */
if (mode == OOmode || mode == XOmode )
{
for (i = nregs - 1; i >= 0; i--)
{
rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
emit_insn (gen_rtx_SET (dst_i, src_i));
}
}
else
{
for (i = nregs - 1; i >= 0; i--)
emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
i * reg_mode_size),
simplify_gen_subreg (reg_mode, src, mode,
i * reg_mode_size)));
}
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
if (TARGET_MMA
&& GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
}
else
@ -16611,7 +16627,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
if (TARGET_MMA && REG_P (src)
&& GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
for (i = 0; i < nregs; i++)
@ -16626,16 +16642,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
if (j == 0 && used_update)
continue;
emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
j * reg_mode_size),
simplify_gen_subreg (reg_mode, src, mode,
j * reg_mode_size)));
/* XO/OO are opaque so cannot use subregs. */
if (mode == OOmode || mode == XOmode )
{
rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
emit_insn (gen_rtx_SET (dst_i, src_i));
}
else
emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
j * reg_mode_size),
simplify_gen_subreg (reg_mode, src, mode,
j * reg_mode_size)));
}
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
if (TARGET_MMA && REG_P (dst)
&& GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
if (restore_basereg != NULL_RTX)
@ -19865,7 +19889,8 @@ rs6000_mangle_type (const_tree type)
type = TYPE_MAIN_VARIANT (type);
if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
&& TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
&& TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
&& TREE_CODE (type) != OPAQUE_TYPE)
return NULL;
if (type == bool_char_type_node) return "U6__boolc";
@ -21753,6 +21778,14 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
}
break;
case UNSPEC:
if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
{
*total = 0;
return true;
}
break;
default:
break;
}
@ -27186,14 +27219,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
if (frommode != tomode)
{
/* Do not allow conversions to/from PXImode and POImode types. */
if (frommode == PXImode)
/* Do not allow conversions to/from XOmode and OOmode types. */
if (frommode == XOmode)
return N_("invalid conversion from type %<__vector_quad%>");
if (tomode == PXImode)
if (tomode == XOmode)
return N_("invalid conversion to type %<__vector_quad%>");
if (frommode == POImode)
if (frommode == OOmode)
return N_("invalid conversion from type %<__vector_pair%>");
if (tomode == POImode)
if (tomode == OOmode)
return N_("invalid conversion to type %<__vector_pair%>");
}
else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@ -27202,19 +27235,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
frommode = TYPE_MODE (TREE_TYPE (fromtype));
tomode = TYPE_MODE (TREE_TYPE (totype));
/* Do not allow conversions to/from PXImode and POImode pointer
/* Do not allow conversions to/from XOmode and OOmode pointer
types, except to/from void pointers. */
if (frommode != tomode
&& frommode != VOIDmode
&& tomode != VOIDmode)
{
if (frommode == PXImode)
if (frommode == XOmode)
return N_("invalid conversion from type %<* __vector_quad%>");
if (tomode == PXImode)
if (tomode == XOmode)
return N_("invalid conversion to type %<* __vector_quad%>");
if (frommode == POImode)
if (frommode == OOmode)
return N_("invalid conversion from type %<* __vector_pair%>");
if (tomode == POImode)
if (tomode == OOmode)
return N_("invalid conversion to type %<* __vector_pair%>");
}
}

View File

@ -1041,7 +1041,7 @@ enum data_align { align_abi, align_opt, align_both };
/* Modes that are not vectors, but require vector alignment. Treat these like
vectors in terms of loads and stores. */
#define VECTOR_ALIGNMENT_P(MODE) \
(FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
(FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
#define ALTIVEC_VECTOR_MODE(MODE) \
((MODE) == V16QImode \
@ -2556,6 +2556,7 @@ typedef struct GTY(()) machine_function
bool fpr_is_wrapped_separately[32];
bool lr_is_wrapped_separately;
bool toc_is_wrapped_separately;
bool mma_return_type_error;
} machine_function;
#endif

View File

@ -778,7 +778,7 @@
;; supplement addressing modes.
(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
SF SD SI DF DD DI TI PTI KF IF TF
POI PXI])
OO XO])
;; Iterate over smin, smax
(define_code_iterator fp_minmax [smin smax])

View File

@ -181,6 +181,9 @@ main (int argc, char *argv[])
printf ("MMA double test fail: %d errors\n",ret);
else
printf ("MMA single test success: 0 MMA errors\n");
#else
if (ret)
abort();
#endif
return ret;

View File

@ -189,6 +189,9 @@ main (int argc, char *argv[])
printf ("MMA single test fail: %d errors\n",ret);
else
printf ("MMA single test success: 0 MMA errors\n");
#else
if (ret)
abort();
#endif
return ret;

View File

@ -40,27 +40,3 @@ foo3 (void)
vquad_t v;
bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
}
__vector_pair
foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
{
return *src;
}
vpair_t
foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
{
return *src;
}
__vector_quad
foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
{
return *src;
}
vquad_t
foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
{
return *src;
}

View File

@ -0,0 +1,38 @@
/* PR target/96506 */
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
extern void bar0();
extern void bar1();
extern void bar2();
extern void bar3();
typedef __vector_pair vpair_t;
typedef __vector_quad vquad_t;
/* Verify we flag errors on the following. */
__vector_pair
foo4 (__vector_pair *src)
{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
return *src;
}
vpair_t
foo5 (vpair_t *src)
{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
return *src;
}
__vector_quad
foo6 (__vector_quad *src)
{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
return *src;
}
vquad_t
foo7 (vquad_t *src)
{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
return *src;
}