mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-03 07:10:28 +08:00
[GCC][PATCH][ARM]Add ACLE intrinsics for dot product (vusdot - vector, v<us/su>dot - by element) for AArch32 AdvSIMD ARMv8.6 Extension
This patch adds the ARMv8.6 Extension ACLE intrinsics for dot product operations (vector/by element) to the ARM back-end. These are: usdot (vector), <us/su>dot (by element). The functions are optional from ARMv8.2-a as -march=armv8.2-a+i8mm and for ARM they remain optional after as of ARMv8.6-a. The functions are declared in arm_neon.h, RTL patterns are defined to generate assembler and tests are added to verify and perform adequate checks. Regression testing on arm-none-eabi passed successfully. gcc/ChangeLog: 2020-02-11 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config/arm/arm-builtins.c (enum arm_type_qualifiers): (USTERNOP_QUALIFIERS): New define. (USMAC_LANE_QUADTUP_QUALIFIERS): New define. (SUMAC_LANE_QUADTUP_QUALIFIERS): New define. (arm_expand_builtin_args): Add case ARG_BUILTIN_LANE_QUADTUP_INDEX. (arm_expand_builtin_1): Add qualifier_lane_quadtup_index. * config/arm/arm_neon.h (vusdot_s32): New. (vusdot_lane_s32): New. (vusdotq_lane_s32): New. (vsudot_lane_s32): New. (vsudotq_lane_s32): New. * config/arm/arm_neon_builtins.def (usdot, usdot_lane,sudot_lane): New. * config/arm/iterators.md (DOTPROD_I8MM): New. (sup, opsuffix): Add <us/su>. * config/arm/neon.md (neon_usdot, <us/su>dot_lane: New. * config/arm/unspecs.md (UNSPEC_DOT_US, UNSPEC_DOT_SU): New. gcc/testsuite/ChangeLog: 2020-02-11 Stam Markianos-Wright <stam.markianos-wright@arm.com> * gcc.target/arm/simd/vdot-2-1.c: New test. * gcc.target/arm/simd/vdot-2-2.c: New test. * gcc.target/arm/simd/vdot-2-3.c: New test. * gcc.target/arm/simd/vdot-2-4.c: New test.
This commit is contained in:
parent
667afe5a49
commit
f348846e25
@ -1,3 +1,22 @@
|
||||
2020-02-11 Stam Markianos-Wright <stam.markianos-wright@arm.com>
|
||||
|
||||
* config/arm/arm-builtins.c (enum arm_type_qualifiers):
|
||||
(USTERNOP_QUALIFIERS): New define.
|
||||
(USMAC_LANE_QUADTUP_QUALIFIERS): New define.
|
||||
(SUMAC_LANE_QUADTUP_QUALIFIERS): New define.
|
||||
(arm_expand_builtin_args): Add case ARG_BUILTIN_LANE_QUADTUP_INDEX.
|
||||
(arm_expand_builtin_1): Add qualifier_lane_quadtup_index.
|
||||
* config/arm/arm_neon.h (vusdot_s32): New.
|
||||
(vusdot_lane_s32): New.
|
||||
(vusdotq_lane_s32): New.
|
||||
(vsudot_lane_s32): New.
|
||||
(vsudotq_lane_s32): New.
|
||||
* config/arm/arm_neon_builtins.def (usdot, usdot_lane,sudot_lane): New.
|
||||
* config/arm/iterators.md (DOTPROD_I8MM): New.
|
||||
(sup, opsuffix): Add <us/su>.
|
||||
* config/arm/neon.md (neon_usdot, <us/su>dot_lane: New.
|
||||
* config/arm/unspecs.md (UNSPEC_DOT_US, UNSPEC_DOT_SU): New.
|
||||
|
||||
2020-02-11 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/93661
|
||||
|
@ -86,7 +86,10 @@ enum arm_type_qualifiers
|
||||
qualifier_const_void_pointer = 0x802,
|
||||
/* Lane indices selected in pairs - must be within range of previous
|
||||
argument = a vector. */
|
||||
qualifier_lane_pair_index = 0x1000
|
||||
qualifier_lane_pair_index = 0x1000,
|
||||
/* Lane indices selected in quadtuplets - must be within range of previous
|
||||
argument = a vector. */
|
||||
qualifier_lane_quadtup_index = 0x2000
|
||||
};
|
||||
|
||||
/* The qualifier_internal allows generation of a unary builtin from
|
||||
@ -122,6 +125,13 @@ arm_unsigned_uternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
qualifier_unsigned };
|
||||
#define UTERNOP_QUALIFIERS (arm_unsigned_uternop_qualifiers)
|
||||
|
||||
/* T (T, unsigned T, T). */
|
||||
static enum arm_type_qualifiers
|
||||
arm_usternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_unsigned,
|
||||
qualifier_none };
|
||||
#define USTERNOP_QUALIFIERS (arm_usternop_qualifiers)
|
||||
|
||||
/* T (T, immediate). */
|
||||
static enum arm_type_qualifiers
|
||||
arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
@ -176,6 +186,20 @@ arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
qualifier_unsigned, qualifier_lane_index };
|
||||
#define UMAC_LANE_QUALIFIERS (arm_umac_lane_qualifiers)
|
||||
|
||||
/* T (T, unsigned T, T, lane index). */
|
||||
static enum arm_type_qualifiers
|
||||
arm_usmac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_unsigned,
|
||||
qualifier_none, qualifier_lane_quadtup_index };
|
||||
#define USMAC_LANE_QUADTUP_QUALIFIERS (arm_usmac_lane_quadtup_qualifiers)
|
||||
|
||||
/* T (T, T, unsigend T, lane index). */
|
||||
static enum arm_type_qualifiers
|
||||
arm_sumac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_none,
|
||||
qualifier_unsigned, qualifier_lane_quadtup_index };
|
||||
#define SUMAC_LANE_QUADTUP_QUALIFIERS (arm_sumac_lane_quadtup_qualifiers)
|
||||
|
||||
/* T (T, T, immediate). */
|
||||
static enum arm_type_qualifiers
|
||||
arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
@ -2177,6 +2201,7 @@ typedef enum {
|
||||
ARG_BUILTIN_LANE_INDEX,
|
||||
ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX,
|
||||
ARG_BUILTIN_LANE_PAIR_INDEX,
|
||||
ARG_BUILTIN_LANE_QUADTUP_INDEX,
|
||||
ARG_BUILTIN_NEON_MEMORY,
|
||||
ARG_BUILTIN_MEMORY,
|
||||
ARG_BUILTIN_STOP
|
||||
@ -2325,11 +2350,24 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
|
||||
if (CONST_INT_P (op[argc]))
|
||||
{
|
||||
machine_mode vmode = mode[argc - 1];
|
||||
neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode) / 2, exp);
|
||||
neon_lane_bounds (op[argc], 0,
|
||||
GET_MODE_NUNITS (vmode) / 2, exp);
|
||||
}
|
||||
/* If the lane index isn't a constant then the next
|
||||
case will error. */
|
||||
/* Fall through. */
|
||||
/* If the lane index isn't a constant then error out. */
|
||||
goto constant_arg;
|
||||
|
||||
case ARG_BUILTIN_LANE_QUADTUP_INDEX:
|
||||
/* Previous argument must be a vector, which this indexes. */
|
||||
gcc_assert (argc > 0);
|
||||
if (CONST_INT_P (op[argc]))
|
||||
{
|
||||
machine_mode vmode = mode[argc - 1];
|
||||
neon_lane_bounds (op[argc], 0,
|
||||
GET_MODE_NUNITS (vmode) / 4, exp);
|
||||
}
|
||||
/* If the lane index isn't a constant then error out. */
|
||||
goto constant_arg;
|
||||
|
||||
case ARG_BUILTIN_CONSTANT:
|
||||
constant_arg:
|
||||
if (!(*insn_data[icode].operand[opno].predicate)
|
||||
@ -2493,6 +2531,8 @@ arm_expand_builtin_1 (int fcode, tree exp, rtx target,
|
||||
args[k] = ARG_BUILTIN_LANE_INDEX;
|
||||
else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
|
||||
args[k] = ARG_BUILTIN_LANE_PAIR_INDEX;
|
||||
else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
|
||||
args[k] = ARG_BUILTIN_LANE_QUADTUP_INDEX;
|
||||
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
|
||||
args[k] = ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX;
|
||||
else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
|
||||
|
@ -18742,6 +18742,52 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||
return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
|
||||
/* AdvSIMD Matrix Multiply-Accumulate and Dot Product intrinsics. */
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("arch=armv8.2-a+i8mm")
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
|
||||
{
|
||||
return __builtin_neon_usdotv8qi_ssus (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a,
|
||||
int8x8_t __b, const int __index)
|
||||
{
|
||||
return __builtin_neon_usdot_lanev8qi_ssuss (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a,
|
||||
int8x8_t __b, const int __index)
|
||||
{
|
||||
return __builtin_neon_usdot_lanev16qi_ssuss (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vsudot_lane_s32 (int32x2_t __r, int8x8_t __a,
|
||||
uint8x8_t __b, const int __index)
|
||||
{
|
||||
return __builtin_neon_sudot_lanev8qi_sssus (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a,
|
||||
uint8x8_t __b, const int __index)
|
||||
{
|
||||
return __builtin_neon_sudot_lanev16qi_sssus (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
|
@ -352,6 +352,10 @@ VAR2 (UTERNOP, udot, v8qi, v16qi)
|
||||
VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
|
||||
VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
|
||||
|
||||
VAR1 (USTERNOP, usdot, v8qi)
|
||||
VAR2 (USMAC_LANE_QUADTUP, usdot_lane, v8qi, v16qi)
|
||||
VAR2 (SUMAC_LANE_QUADTUP, sudot_lane, v8qi, v16qi)
|
||||
|
||||
VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf)
|
||||
VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf)
|
||||
VAR4 (TERNOP, vcmla0, v2sf, v4sf, v4hf, v8hf)
|
||||
|
@ -480,6 +480,8 @@
|
||||
|
||||
(define_int_iterator DOTPROD [UNSPEC_DOT_S UNSPEC_DOT_U])
|
||||
|
||||
(define_int_iterator DOTPROD_I8MM [UNSPEC_DOT_US UNSPEC_DOT_SU])
|
||||
|
||||
(define_int_iterator VFMLHALVES [UNSPEC_VFML_LO UNSPEC_VFML_HI])
|
||||
|
||||
(define_int_iterator VCADD [UNSPEC_VCADD90 UNSPEC_VCADD270])
|
||||
@ -938,6 +940,7 @@
|
||||
(UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u")
|
||||
(UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u")
|
||||
(UNSPEC_DOT_S "s") (UNSPEC_DOT_U "u")
|
||||
(UNSPEC_DOT_US "us") (UNSPEC_DOT_SU "su")
|
||||
(UNSPEC_SSAT16 "s") (UNSPEC_USAT16 "u")
|
||||
])
|
||||
|
||||
@ -1169,6 +1172,9 @@
|
||||
(define_int_attr MRRC [(VUNSPEC_MRRC "MRRC") (VUNSPEC_MRRC2 "MRRC2")])
|
||||
|
||||
(define_int_attr opsuffix [(UNSPEC_DOT_S "s8")
|
||||
(UNSPEC_DOT_U "u8")])
|
||||
(UNSPEC_DOT_U "u8")
|
||||
(UNSPEC_DOT_US "s8")
|
||||
(UNSPEC_DOT_SU "u8")
|
||||
])
|
||||
|
||||
(define_int_attr smlaw_op [(UNSPEC_SMLAWB "smlawb") (UNSPEC_SMLAWT "smlawt")])
|
||||
|
@ -3279,6 +3279,20 @@
|
||||
[(set_attr "type" "neon_dot<q>")]
|
||||
)
|
||||
|
||||
;; These instructions map to the __builtins for the Dot Product operations.
|
||||
(define_insn "neon_usdot<vsi2qi>"
|
||||
[(set (match_operand:VCVTI 0 "register_operand" "=w")
|
||||
(plus:VCVTI
|
||||
(unspec:VCVTI
|
||||
[(match_operand:<VSI2QI> 2 "register_operand" "w")
|
||||
(match_operand:<VSI2QI> 3 "register_operand" "w")]
|
||||
UNSPEC_DOT_US)
|
||||
(match_operand:VCVTI 1 "register_operand" "0")))]
|
||||
"TARGET_I8MM"
|
||||
"vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
|
||||
[(set_attr "type" "neon_dot<q>")]
|
||||
)
|
||||
|
||||
;; These instructions map to the __builtins for the Dot Product
|
||||
;; indexed operations.
|
||||
(define_insn "neon_<sup>dot_lane<vsi2qi>"
|
||||
@ -3298,6 +3312,25 @@
|
||||
[(set_attr "type" "neon_dot<q>")]
|
||||
)
|
||||
|
||||
;; These instructions map to the __builtins for the Dot Product
|
||||
;; indexed operations in the v8.6 I8MM extension.
|
||||
(define_insn "neon_<sup>dot_lane<vsi2qi>"
|
||||
[(set (match_operand:VCVTI 0 "register_operand" "=w")
|
||||
(plus:VCVTI
|
||||
(unspec:VCVTI
|
||||
[(match_operand:<VSI2QI> 2 "register_operand" "w")
|
||||
(match_operand:V8QI 3 "register_operand" "t")
|
||||
(match_operand:SI 4 "immediate_operand" "i")]
|
||||
DOTPROD_I8MM)
|
||||
(match_operand:VCVTI 1 "register_operand" "0")))]
|
||||
"TARGET_I8MM"
|
||||
{
|
||||
operands[4] = GEN_INT (INTVAL (operands[4]));
|
||||
return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
|
||||
}
|
||||
[(set_attr "type" "neon_dot<q>")]
|
||||
)
|
||||
|
||||
;; These expands map to the Dot Product optab the vectorizer checks for.
|
||||
;; The auto-vectorizer expects a dot product builtin that also does an
|
||||
;; accumulation into the provided register.
|
||||
|
@ -493,6 +493,8 @@
|
||||
UNSPEC_VRNDX
|
||||
UNSPEC_DOT_S
|
||||
UNSPEC_DOT_U
|
||||
UNSPEC_DOT_US
|
||||
UNSPEC_DOT_SU
|
||||
UNSPEC_VFML_LO
|
||||
UNSPEC_VFML_HI
|
||||
UNSPEC_VCADD90
|
||||
|
@ -1,3 +1,10 @@
|
||||
2020-02-11 Stam Markianos-Wright <stam.markianos-wright@arm.com>
|
||||
|
||||
* gcc.target/arm/simd/vdot-2-1.c: New test.
|
||||
* gcc.target/arm/simd/vdot-2-2.c: New test.
|
||||
* gcc.target/arm/simd/vdot-2-3.c: New test.
|
||||
* gcc.target/arm/simd/vdot-2-4.c: New test.
|
||||
|
||||
2020-02-11 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/93661
|
||||
|
91
gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c
Normal file
91
gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c
Normal file
@ -0,0 +1,91 @@
|
||||
/* { dg-do assemble { target { arm*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
|
||||
/* { dg-add-options arm_v8_2a_i8mm } */
|
||||
/* { dg-additional-options "-O -save-temps" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
/* Unsigned-Signed Dot Product instructions. */
|
||||
|
||||
/*
|
||||
**usfoo:
|
||||
** ...
|
||||
** vusdot\.s8 d0, d1, d2
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_s32 (r, x, y);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfoo_lane:
|
||||
** ...
|
||||
** vusdot\.s8 d0, d1, d2\[0\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_lane_s32 (r, x, y, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfooq_lane:
|
||||
** ...
|
||||
** vusdot\.s8 q0, q1, d4\[1\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
|
||||
{
|
||||
return vusdotq_lane_s32 (r, x, y, 1);
|
||||
}
|
||||
|
||||
/* Signed-Unsigned Dot Product instructions. */
|
||||
|
||||
/*
|
||||
**sfoo_lane:
|
||||
** ...
|
||||
** vsudot\.u8 d0, d1, d2\[0\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
|
||||
{
|
||||
return vsudot_lane_s32 (r, x, y, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
**sfooq_lane:
|
||||
** ...
|
||||
** vsudot\.u8 q0, q1, d4\[1\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
|
||||
{
|
||||
return vsudotq_lane_s32 (r, x, y, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfoo_untied:
|
||||
** ...
|
||||
** vusdot\.s8 d1, d2, d3
|
||||
** vmov d0, d1 @ v2si
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_s32 (r, x, y);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfoo_lane_untied:
|
||||
** ...
|
||||
** vusdot.s8 d1, d2, d3\[0\]
|
||||
** vmov d0, d1 @ v2si
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_lane_s32 (r, x, y, 0);
|
||||
}
|
||||
|
90
gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c
Normal file
90
gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c
Normal file
@ -0,0 +1,90 @@
|
||||
/* { dg-do assemble { target { arm*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
|
||||
/* { dg-add-options arm_v8_2a_i8mm } */
|
||||
/* { dg-additional-options "-O -save-temps -mbig-endian" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
/* Unsigned-Signed Dot Product instructions. */
|
||||
|
||||
/*
|
||||
**usfoo:
|
||||
** ...
|
||||
** vusdot\.s8 d0, d1, d2
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_s32 (r, x, y);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfoo_lane:
|
||||
** ...
|
||||
** vusdot\.s8 d0, d1, d2\[0\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_lane_s32 (r, x, y, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfooq_lane:
|
||||
** ...
|
||||
** vusdot\.s8 q0, q1, d4\[1\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
|
||||
{
|
||||
return vusdotq_lane_s32 (r, x, y, 1);
|
||||
}
|
||||
|
||||
/* Signed-Unsigned Dot Product instructions. */
|
||||
|
||||
/*
|
||||
**sfoo_lane:
|
||||
** ...
|
||||
** vsudot\.u8 d0, d1, d2\[0\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
|
||||
{
|
||||
return vsudot_lane_s32 (r, x, y, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
**sfooq_lane:
|
||||
** ...
|
||||
** vsudot\.u8 q0, q1, d4\[1\]
|
||||
** bx lr
|
||||
*/
|
||||
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
|
||||
{
|
||||
return vsudotq_lane_s32 (r, x, y, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfoo_untied:
|
||||
** ...
|
||||
** vusdot\.s8 d1, d2, d3
|
||||
** vmov d0, d1 @ v2si
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_s32 (r, x, y);
|
||||
}
|
||||
|
||||
/*
|
||||
**usfoo_lane_untied:
|
||||
** ...
|
||||
** vusdot.s8 d1, d2, d3\[0\]
|
||||
** vmov d0, d1 @ v2si
|
||||
** bx lr
|
||||
*/
|
||||
int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
return vusdot_lane_s32 (r, x, y, 0);
|
||||
}
|
21
gcc/testsuite/gcc.target/arm/simd/vdot-2-3.c
Normal file
21
gcc/testsuite/gcc.target/arm/simd/vdot-2-3.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-do assemble { target { arm*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
|
||||
/* { dg-add-options arm_v8_2a_i8mm } */
|
||||
/* { dg-additional-options "--save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
/* Unsigned-Signed Dot Product instructions. */
|
||||
|
||||
int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
|
||||
{
|
||||
/* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
|
||||
return vusdot_lane_s32 (r, x, y, -1);
|
||||
}
|
||||
|
||||
|
||||
int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
|
||||
{
|
||||
/* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
|
||||
return vusdotq_lane_s32 (r, x, y, 2);
|
||||
}
|
20
gcc/testsuite/gcc.target/arm/simd/vdot-2-4.c
Normal file
20
gcc/testsuite/gcc.target/arm/simd/vdot-2-4.c
Normal file
@ -0,0 +1,20 @@
|
||||
/* { dg-do assemble { target { arm*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
|
||||
/* { dg-add-options arm_v8_2a_i8mm } */
|
||||
/* { dg-additional-options "--save-temps" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
/* Signed-Unsigned Dot Product instructions. */
|
||||
|
||||
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
|
||||
{
|
||||
/* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
|
||||
return vsudot_lane_s32 (r, x, y, -1);
|
||||
}
|
||||
|
||||
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
|
||||
{
|
||||
/* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
|
||||
return vsudotq_lane_s32 (r, x, y, 2);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user