mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-04 20:01:21 +08:00
ACLE intrinsics: BFloat16 load intrinsics for AArch32
2020-03-06 Delia Burduv <delia.burduv@arm.com> * config/arm/arm_neon.h (vld2_bf16): New. (vld2q_bf16): New. (vld3_bf16): New. (vld3q_bf16): New. (vld4_bf16): New. (vld4q_bf16): New. (vld2_dup_bf16): New. (vld2q_dup_bf16): New. (vld3_dup_bf16): New. (vld3q_dup_bf16): New. (vld4_dup_bf16): New. (vld4q_dup_bf16): New. * config/arm/arm_neon_builtins.def (vld2): Changed to VAR13 and added v4bf, v8bf (vld2_dup): Changed to VAR8 and added v4bf, v8bf (vld3): Changed to VAR13 and added v4bf, v8bf (vld3_dup): Changed to VAR8 and added v4bf, v8bf (vld4): Changed to VAR13 and added v4bf, v8bf (vld4_dup): Changed to VAR8 and added v4bf, v8bf * config/arm/iterators.md (VDXBF2): New iterator. *config/arm/neon.md (neon_vld2): Use new iterators. (neon_vld2_dup<mode): Use new iterators. (neon_vld3<mode>): Likewise. (neon_vld3qa<mode>): Likewise. (neon_vld3qb<mode>): Likewise. (neon_vld3_dup<mode>): Likewise. (neon_vld4<mode>): Likewise. (neon_vld4qa<mode>): Likewise. (neon_vld4qb<mode>): Likewise. (neon_vld4_dup<mode>): Likewise. (neon_vld2_dupv8bf): New. (neon_vld3_dupv8bf): Likewise. (neon_vld4_dupv8bf): Likewise. * gcc.target/arm/simd/bf16_vldn_1.c: New test.
This commit is contained in:
parent
ff22937572
commit
eb637e7604
@ -1,3 +1,39 @@
|
||||
2020-03-06 Delia Burduv <delia.burduv@arm.com>
|
||||
|
||||
* config/arm/arm_neon.h (vld2_bf16): New.
|
||||
(vld2q_bf16): New.
|
||||
(vld3_bf16): New.
|
||||
(vld3q_bf16): New.
|
||||
(vld4_bf16): New.
|
||||
(vld4q_bf16): New.
|
||||
(vld2_dup_bf16): New.
|
||||
(vld2q_dup_bf16): New.
|
||||
(vld3_dup_bf16): New.
|
||||
(vld3q_dup_bf16): New.
|
||||
(vld4_dup_bf16): New.
|
||||
(vld4q_dup_bf16): New.
|
||||
* config/arm/arm_neon_builtins.def
|
||||
(vld2): Changed to VAR13 and added v4bf, v8bf
|
||||
(vld2_dup): Changed to VAR8 and added v4bf, v8bf
|
||||
(vld3): Changed to VAR13 and added v4bf, v8bf
|
||||
(vld3_dup): Changed to VAR8 and added v4bf, v8bf
|
||||
(vld4): Changed to VAR13 and added v4bf, v8bf
|
||||
(vld4_dup): Changed to VAR8 and added v4bf, v8bf
|
||||
* config/arm/iterators.md (VDXBF2): New iterator.
|
||||
*config/arm/neon.md (neon_vld2): Use new iterators.
|
||||
(neon_vld2_dup<mode): Use new iterators.
|
||||
(neon_vld3<mode>): Likewise.
|
||||
(neon_vld3qa<mode>): Likewise.
|
||||
(neon_vld3qb<mode>): Likewise.
|
||||
(neon_vld3_dup<mode>): Likewise.
|
||||
(neon_vld4<mode>): Likewise.
|
||||
(neon_vld4qa<mode>): Likewise.
|
||||
(neon_vld4qb<mode>): Likewise.
|
||||
(neon_vld4_dup<mode>): Likewise.
|
||||
(neon_vld2_dupv8bf): New.
|
||||
(neon_vld3_dupv8bf): Likewise.
|
||||
(neon_vld4_dupv8bf): Likewise.
|
||||
|
||||
2020-03-06 Delia Burduv <delia.burduv@arm.com>
|
||||
|
||||
* config/arm/arm_neon.h (bfloat16x4x2_t): New typedef.
|
||||
|
@ -19557,6 +19557,114 @@ vst4q_bf16 (bfloat16_t * __ptr, bfloat16x8x4_t __val)
|
||||
return __builtin_neon_vst4v8bf (__ptr, __bu.__o);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld2_bf16 (bfloat16_t const * __ptr)
|
||||
{
|
||||
union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld2v4bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld2q_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld2v8bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4x3_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld3_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld3v4bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8x3_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld3q_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld3v8bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld4_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld4v4bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld4q_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld4v8bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld2_dup_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld2_dupv4bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld2q_dup_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld2_dupv8bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4x3_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld3_dup_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld3_dupv4bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8x3_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld3q_dup_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld3_dupv8bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld4_dup_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld4_dupv4bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld4q_dup_bf16 (const bfloat16_t * __ptr)
|
||||
{
|
||||
union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv;
|
||||
__rv.__o = __builtin_neon_vld4_dupv8bf ((const __builtin_neon_hi *) __ptr);
|
||||
return __rv.__i;
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -320,29 +320,29 @@ VAR12 (STORE1, vst1,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
|
||||
VAR12 (STORE1LANE, vst1_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
|
||||
VAR11 (LOAD1, vld2,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
|
||||
VAR13 (LOAD1, vld2,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR9 (LOAD1LANE, vld2_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
|
||||
VAR8 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
|
||||
VAR13 (STORE1, vst2,
|
||||
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
|
||||
VAR9 (STORE1LANE, vst2_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR11 (LOAD1, vld3,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
|
||||
VAR13 (LOAD1, vld3,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR9 (LOAD1LANE, vld3_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
|
||||
VAR8 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
|
||||
VAR13 (STORE1, vst3,
|
||||
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
|
||||
VAR9 (STORE1LANE, vst3_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR11 (LOAD1, vld4,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
|
||||
VAR13 (LOAD1, vld4,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
|
||||
VAR9 (LOAD1LANE, vld4_lane,
|
||||
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
|
||||
VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
|
||||
VAR8 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
|
||||
VAR13 (STORE1, vst4,
|
||||
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
|
||||
VAR9 (STORE1LANE, vst4_lane,
|
||||
|
@ -87,6 +87,9 @@
|
||||
;; Double-width vector modes plus 64-bit elements, including V4BF.
|
||||
(define_mode_iterator VDXBF [V8QI V4HI V4HF (V4BF "TARGET_BF16_SIMD") V2SI V2SF DI])
|
||||
|
||||
;; Double-width vector modes plus 64-bit elements, V4BF and V8BF.
|
||||
(define_mode_iterator VDXBF2 [V8QI V4HI V4HF V2SI V2SF DI (V4BF "TARGET_BF16_SIMD") (V8BF ("TARGET_BF16_SIMD"))])
|
||||
|
||||
;; Double-width vector modes plus 64-bit elements,
|
||||
;; with V4BFmode added, suitable for moves.
|
||||
(define_mode_iterator VDXMOV [V8QI V4HI V4HF V4BF V2SI V2SF DI])
|
||||
|
@ -5428,7 +5428,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld2<mode>"
|
||||
[(set (match_operand:TI 0 "s_register_operand" "=w")
|
||||
(unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
|
||||
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD2))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -5453,7 +5453,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld2<mode>"
|
||||
[(set (match_operand:OI 0 "s_register_operand" "=w")
|
||||
(unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
|
||||
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD2))]
|
||||
"TARGET_NEON"
|
||||
"vld2.<V_sz_elem>\t%h0, %A1"
|
||||
@ -5516,7 +5516,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld2_dup<mode>"
|
||||
[(set (match_operand:TI 0 "s_register_operand" "=w")
|
||||
(unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
|
||||
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD2_DUP))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -5531,6 +5531,27 @@ if (BYTES_BIG_ENDIAN)
|
||||
(const_string "neon_load1_1reg<q>")))]
|
||||
)
|
||||
|
||||
(define_insn "neon_vld2_dupv8bf"
|
||||
[(set (match_operand:OI 0 "s_register_operand" "=w")
|
||||
(unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
|
||||
(unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD2_DUP))]
|
||||
"TARGET_BF16_SIMD"
|
||||
{
|
||||
rtx ops[5];
|
||||
int tabbase = REGNO (operands[0]);
|
||||
|
||||
ops[4] = operands[1];
|
||||
ops[0] = gen_rtx_REG (V4BFmode, tabbase);
|
||||
ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
|
||||
ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
|
||||
ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
|
||||
output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
|
||||
return "";
|
||||
}
|
||||
[(set_attr "type" "neon_load2_all_lanes_q")]
|
||||
)
|
||||
|
||||
(define_expand "vec_store_lanesti<mode>"
|
||||
[(set (match_operand:TI 0 "neon_struct_operand")
|
||||
(unspec:TI [(match_operand:TI 1 "s_register_operand")
|
||||
@ -5637,7 +5658,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld3<mode>"
|
||||
[(set (match_operand:EI 0 "s_register_operand" "=w")
|
||||
(unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
|
||||
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD3))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -5665,7 +5686,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_expand "neon_vld3<mode>"
|
||||
[(match_operand:CI 0 "s_register_operand")
|
||||
(match_operand:CI 1 "neon_struct_operand")
|
||||
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx mem;
|
||||
@ -5680,7 +5701,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld3qa<mode>"
|
||||
[(set (match_operand:CI 0 "s_register_operand" "=w")
|
||||
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
|
||||
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD3A))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -5700,7 +5721,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
[(set (match_operand:CI 0 "s_register_operand" "=w")
|
||||
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
|
||||
(match_operand:CI 2 "s_register_operand" "0")
|
||||
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD3B))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -5777,7 +5798,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld3_dup<mode>"
|
||||
[(set (match_operand:EI 0 "s_register_operand" "=w")
|
||||
(unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
|
||||
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD3_DUP))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -5800,6 +5821,26 @@ if (BYTES_BIG_ENDIAN)
|
||||
(const_string "neon_load3_all_lanes<q>")
|
||||
(const_string "neon_load1_1reg<q>")))])
|
||||
|
||||
(define_insn "neon_vld3_dupv8bf"
|
||||
[(set (match_operand:CI 0 "s_register_operand" "=w")
|
||||
(unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
|
||||
(unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD2_DUP))]
|
||||
"TARGET_BF16_SIMD"
|
||||
{
|
||||
rtx ops[4];
|
||||
int tabbase = REGNO (operands[0]);
|
||||
|
||||
ops[3] = operands[1];
|
||||
ops[0] = gen_rtx_REG (V4BFmode, tabbase);
|
||||
ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
|
||||
ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
|
||||
output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
|
||||
return "";
|
||||
}
|
||||
[(set_attr "type" "neon_load3_all_lanes_q")]
|
||||
)
|
||||
|
||||
(define_expand "vec_store_lanesei<mode>"
|
||||
[(set (match_operand:EI 0 "neon_struct_operand")
|
||||
(unspec:EI [(match_operand:EI 1 "s_register_operand")
|
||||
@ -5955,7 +5996,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld4<mode>"
|
||||
[(set (match_operand:OI 0 "s_register_operand" "=w")
|
||||
(unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
|
||||
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD4))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -5983,7 +6024,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_expand "neon_vld4<mode>"
|
||||
[(match_operand:XI 0 "s_register_operand")
|
||||
(match_operand:XI 1 "neon_struct_operand")
|
||||
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx mem;
|
||||
@ -5998,7 +6039,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld4qa<mode>"
|
||||
[(set (match_operand:XI 0 "s_register_operand" "=w")
|
||||
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
|
||||
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD4A))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -6019,7 +6060,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
[(set (match_operand:XI 0 "s_register_operand" "=w")
|
||||
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
|
||||
(match_operand:XI 2 "s_register_operand" "0")
|
||||
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD4B))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -6099,7 +6140,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
(define_insn "neon_vld4_dup<mode>"
|
||||
[(set (match_operand:OI 0 "s_register_operand" "=w")
|
||||
(unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
|
||||
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD4_DUP))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
@ -6125,6 +6166,27 @@ if (BYTES_BIG_ENDIAN)
|
||||
(const_string "neon_load1_1reg<q>")))]
|
||||
)
|
||||
|
||||
(define_insn "neon_vld4_dupv8bf"
|
||||
[(set (match_operand:XI 0 "s_register_operand" "=w")
|
||||
(unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
|
||||
(unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
UNSPEC_VLD2_DUP))]
|
||||
"TARGET_BF16_SIMD"
|
||||
{
|
||||
rtx ops[5];
|
||||
int tabbase = REGNO (operands[0]);
|
||||
|
||||
ops[4] = operands[1];
|
||||
ops[0] = gen_rtx_REG (V4BFmode, tabbase);
|
||||
ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
|
||||
ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
|
||||
ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
|
||||
output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
|
||||
return "";
|
||||
}
|
||||
[(set_attr "type" "neon_load4_all_lanes_q")]
|
||||
)
|
||||
|
||||
(define_expand "vec_store_lanesoi<mode>"
|
||||
[(set (match_operand:OI 0 "neon_struct_operand")
|
||||
(unspec:OI [(match_operand:OI 1 "s_register_operand")
|
||||
|
@ -1,3 +1,7 @@
|
||||
2020-03-06 Delia Burduv <delia.burduv@arm.com>
|
||||
|
||||
* gcc.target/arm/simd/bf16_vldn_1.c: New test.
|
||||
|
||||
2020-03-06 Delia Burduv <delia.burduv@arm.com>
|
||||
|
||||
* gcc.target/arm/simd/bf16_vstn_1.c: New test.
|
||||
|
152
gcc/testsuite/gcc.target/arm/simd/bf16_vldn_1.c
Normal file
152
gcc/testsuite/gcc.target/arm/simd/bf16_vldn_1.c
Normal file
@ -0,0 +1,152 @@
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
|
||||
/*
|
||||
**test_vld2_bf16:
|
||||
** ...
|
||||
** vld2.16 {d0-d1}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x4x2_t
|
||||
test_vld2_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld2_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld2q_bf16:
|
||||
** ...
|
||||
** vld2.16 {d0-d3}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x8x2_t
|
||||
test_vld2q_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld2q_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld2_dup_bf16:
|
||||
** ...
|
||||
** vld2.16 {d0\[\], d1\[\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x4x2_t
|
||||
test_vld2_dup_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld2_dup_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld2q_dup_bf16:
|
||||
** ...
|
||||
** vld2.16 {d0, d1, d2, d3}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x8x2_t
|
||||
test_vld2q_dup_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld2q_dup_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld3_bf16:
|
||||
** ...
|
||||
** vld3.16 {d0-d2}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x4x3_t
|
||||
test_vld3_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld3_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld3q_bf16:
|
||||
** ...
|
||||
** vld3.16 {d1, d3, d5}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x8x3_t
|
||||
test_vld3q_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld3q_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld3_dup_bf16:
|
||||
** ...
|
||||
** vld3.16 {d0\[\], d1\[\], d2\[\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x4x3_t
|
||||
test_vld3_dup_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld3_dup_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld3q_dup_bf16:
|
||||
** ...
|
||||
** vld3.16 {d0\[\], d1\[\], d2\[\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x8x3_t
|
||||
test_vld3q_dup_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld3q_dup_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld4_bf16:
|
||||
** ...
|
||||
** vld4.16 {d0-d3}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x4x4_t
|
||||
test_vld4_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld4_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld4q_bf16:
|
||||
** ...
|
||||
** vld4.16 {d1, d3, d5, d7}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x8x4_t
|
||||
test_vld4q_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld4q_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld4_dup_bf16:
|
||||
** ...
|
||||
** vld4.16 {d0\[\], d1\[\], d2\[\], d3\[\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x4x4_t
|
||||
test_vld4_dup_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld4_dup_bf16 (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
**test_vld4q_dup_bf16:
|
||||
** ...
|
||||
** vld4.16 {d0\[\], d1\[\], d2\[\], d3\[\]}, \[r0\]
|
||||
** bx lr
|
||||
*/
|
||||
bfloat16x8x4_t
|
||||
test_vld4q_dup_bf16 (bfloat16_t * ptr)
|
||||
{
|
||||
return vld4q_dup_bf16 (ptr);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user