mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 13:41:18 +08:00
[AArch32] ACLE intrinsics bfloat16 vmmla and vfma<b/t> for AArch32 AdvSIMD
Commit rest of the 43031fbdda7d4edbd607365a4f3bbec069fe3983 content. I screwed up on the "git add" commands there.
This commit is contained in:
parent
9412b35aff
commit
2d22ab64c4
@ -1,3 +1,31 @@
|
||||
2020-03-05 Delia Burduv <delia.burduv@arm.com>
|
||||
|
||||
* config/arm/arm_neon.h (vbfmmlaq_f32): New.
|
||||
(vbfmlalbq_f32): New.
|
||||
(vbfmlaltq_f32): New.
|
||||
(vbfmlalbq_lane_f32): New.
|
||||
(vbfmlaltq_lane_f32): New.
|
||||
(vbfmlalbq_laneq_f32): New.
|
||||
(vbfmlaltq_laneq_f32): New.
|
||||
* config/arm/arm_neon_builtins.def (vmmla): New.
|
||||
(vfmab): New.
|
||||
(vfmat): New.
|
||||
(vfmab_lane): New.
|
||||
(vfmat_lane): New.
|
||||
(vfmab_laneq): New.
|
||||
(vfmat_laneq): New.
|
||||
* config/arm/iterators.md (BF_MA): New int iterator.
|
||||
(bt): New int attribute.
|
||||
(VQXBF): Copy of VQX with V8BF.
|
||||
* config/arm/neon.md (neon_vmmlav8bf): New insn.
|
||||
(neon_vfma<bt>v8bf): New insn.
|
||||
(neon_vfma<bt>_lanev8bf): New insn.
|
||||
(neon_vfma<bt>_laneqv8bf): New expand.
|
||||
(neon_vget_high<mode>): Changed iterator to VQXBF.
|
||||
* config/arm/unspecs.md (UNSPEC_BFMMLA): New UNSPEC.
|
||||
(UNSPEC_BFMAB): New UNSPEC.
|
||||
(UNSPEC_BFMAT): New UNSPEC.
|
||||
|
||||
2020-03-05 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/93399
|
||||
|
@ -19426,6 +19426,59 @@ vcvtq_high_bf16_f32 (bfloat16x8_t inactive, float32x4_t __a)
|
||||
return __builtin_neon_vbfcvtv4sf_highv8bf (inactive, __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vmmlav8bf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vfmabv8bf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vbfmlaltq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vfmatv8bf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vbfmlalbq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vfmab_lanev8bf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vbfmlaltq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vfmat_lanev8bf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vbfmlalbq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vfmab_laneqv8bf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vfmat_laneqv8bf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -391,3 +391,12 @@ VAR2 (UNOP, vbfcvt, v4bf, v8bf)
|
||||
VAR1 (UNOP, vbfcvt_high, v8bf)
|
||||
VAR2 (UNOP, vbfcvtv4sf, v4bf, v8bf)
|
||||
VAR1 (BINOP, vbfcvtv4sf_high, v8bf)
|
||||
|
||||
VAR1 (TERNOP, vmmla, v8bf)
|
||||
|
||||
VAR1 (TERNOP, vfmab, v8bf)
|
||||
VAR1 (TERNOP, vfmat, v8bf)
|
||||
VAR1 (MAC_LANE, vfmab_lane, v8bf)
|
||||
VAR1 (MAC_LANE, vfmat_lane, v8bf)
|
||||
VAR1 (MAC_LANE, vfmab_laneq, v8bf)
|
||||
VAR1 (MAC_LANE, vfmat_laneq, v8bf)
|
||||
|
@ -106,6 +106,9 @@
|
||||
;; Quad-width vector modes plus 64-bit elements.
|
||||
(define_mode_iterator VQX [V16QI V8HI V8HF V8BF V4SI V4SF V2DI])
|
||||
|
||||
;; Quad-width vector modes plus 64-bit elements and V8BF.
|
||||
(define_mode_iterator VQXBF [V16QI V8HI V8HF (V8BF "TARGET_BF16_SIMD") V4SI V4SF V2DI])
|
||||
|
||||
;; Quad-width vector modes without floating-point elements.
|
||||
(define_mode_iterator VQI [V16QI V8HI V4SI])
|
||||
|
||||
@ -493,6 +496,8 @@
|
||||
|
||||
(define_int_iterator MATMUL [UNSPEC_MATMUL_S UNSPEC_MATMUL_U UNSPEC_MATMUL_US])
|
||||
|
||||
(define_int_iterator BF_MA [UNSPEC_BFMAB UNSPEC_BFMAT])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Mode attributes
|
||||
;;----------------------------------------------------------------------------
|
||||
@ -1209,3 +1214,6 @@
|
||||
])
|
||||
|
||||
(define_int_attr smlaw_op [(UNSPEC_SMLAWB "smlawb") (UNSPEC_SMLAWT "smlawt")])
|
||||
|
||||
;; An iterator for VFMA<bt>
|
||||
(define_int_attr bt [(UNSPEC_BFMAB "b") (UNSPEC_BFMAT "t")])
|
||||
|
@ -3924,7 +3924,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
|
||||
(define_expand "neon_vget_high<mode>"
|
||||
[(match_operand:<V_HALF> 0 "s_register_operand")
|
||||
(match_operand:VQX 1 "s_register_operand")]
|
||||
(match_operand:VQXBF 1 "s_register_operand")]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
emit_move_insn (operands[0],
|
||||
@ -6737,3 +6737,64 @@ if (BYTES_BIG_ENDIAN)
|
||||
"TARGET_BF16_FP"
|
||||
""
|
||||
)
|
||||
|
||||
(define_insn "neon_vmmlav8bf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
|
||||
(match_operand:V8BF 3 "register_operand" "w")]
|
||||
UNSPEC_BFMMLA)))]
|
||||
"TARGET_BF16_SIMD"
|
||||
"vmmla.bf16\\t%q0, %q2, %q3"
|
||||
[(set_attr "type" "neon_fp_mla_s_q")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vfma<bt>v8bf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
|
||||
(match_operand:V8BF 3 "register_operand" "w")]
|
||||
BF_MA)))]
|
||||
"TARGET_BF16_SIMD"
|
||||
"vfma<bt>.bf16\\t%q0, %q2, %q3"
|
||||
[(set_attr "type" "neon_fp_mla_s_q")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vfma<bt>_lanev8bf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
|
||||
(match_operand:V4BF 3 "register_operand" "x")
|
||||
(match_operand:SI 4 "const_int_operand" "n")]
|
||||
BF_MA)))]
|
||||
"TARGET_BF16_SIMD"
|
||||
"vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
|
||||
[(set_attr "type" "neon_fp_mla_s_scalar_q")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vfma<bt>_laneqv8bf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
|
||||
(match_operand:V8BF 3 "register_operand" "x")
|
||||
(match_operand:SI 4 "const_int_operand" "n")]
|
||||
BF_MA)))]
|
||||
"TARGET_BF16_SIMD"
|
||||
{
|
||||
int lane = INTVAL (operands[4]);
|
||||
gcc_assert (IN_RANGE(lane, 0, 7));
|
||||
if (lane < 4)
|
||||
{
|
||||
emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx op_highpart = gen_reg_rtx (V4BFmode);
|
||||
emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
|
||||
operands[4] = GEN_INT (lane - 4);
|
||||
emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
|
||||
}
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "neon_fp_mla_s_scalar_q")]
|
||||
)
|
||||
|
@ -508,4 +508,7 @@
|
||||
UNSPEC_MATMUL_US
|
||||
UNSPEC_BFCVT
|
||||
UNSPEC_BFCVT_HIGH
|
||||
UNSPEC_BFMMLA
|
||||
UNSPEC_BFMAB
|
||||
UNSPEC_BFMAT
|
||||
])
|
||||
|
Loading…
x
Reference in New Issue
Block a user