[Arm] Implement CDE intrinsics for MVE registers.

Implement CDE intrinsics on MVE registers.

Other than the basics required for adding intrinsics this patch consists
of three changes.

** We separate out the MVE types and casts from the arm_mve.h header.

This is so that the types can be used in arm_cde.h without the need to include
the entire arm_mve.h header.
The only type that arm_cde.h needs is `uint8x16_t`, so this separation could be
avoided by using a `typedef` in this file.
Since the introduced intrinsics are all defined to act on the full range of MVE
types, declaring all such types seems intuitive since it will provide their
declaration to the user too.

This arm_mve_types.h header not only includes the MVE types, but also
the conversion intrinsics between them.
Some of the conversion intrinsics are needed for arm_cde.h, but most are
not.  We include all conversion intrinsics to keep the definition of
such conversion functions all in one place, on the understanding that
extra conversion functions being defined when including `arm_cde.h` is
not a problem.

** We define the TARGET_RESOLVE_OVERLOADED_BUILTIN hook for the Arm backend.

This is needed to implement the polymorphism for the required intrinsics.
The intrinsics have no specialised version, and the resulting assembly
instruction for all different types should be exactly the same.
Due to this we have implemented these intrinsics via one builtin on one type.
All other calls to the intrinsic with different types are implicitly cast to
the one type that is defined, and hence are all expanded to the same RTL
pattern that is only defined for one machine mode.

** We seperate the initialisation of the CDE intrinsics from others.

This allows us to ensure that the CDE intrinsics acting on MVE registers
are only created when both CDE and MVE are available.
Only initialising these builtins when both features are available is
especially important since they require a type that is only initialised
when the target supports hard float.  Hence trying to initialise these
builtins on a soft float target would cause an ICE.

Testing done:
  Full bootstrap and regtest on arm-none-linux-gnueabihf
  Regression test on arm-none-eabi

Ok for trunk?

gcc/ChangeLog:

2020-03-10  Matthew Malcomson  <matthew.malcomson@arm.com>

	* config.gcc (arm_mve_types.h): New extra_header for arm.
	* config/arm/arm-builtins.c (arm_resolve_overloaded_builtin): New.
	(arm_init_cde_builtins): New.
	(arm_init_acle_builtins): Remove initialisation of CDE builtins.
	(arm_init_builtins): Call arm_init_cde_builtins when target
	supports CDE.
	* config/arm/arm-c.c (arm_resolve_overloaded_builtin): New declaration.
	(arm_register_target_pragmas): Initialise resolve_overloaded_builtin
	hook to the implementation for the arm backend.
	* config/arm/arm.h (ARM_MVE_CDE_CONST_1): New.
	(ARM_MVE_CDE_CONST_2): New.
	(ARM_MVE_CDE_CONST_3): New.
	* config/arm/arm_cde.h (__arm_vcx1q_u8): New.
	(__arm_vcx1qa): New.
	(__arm_vcx2q): New.
	(__arm_vcx2q_u8): New.
	(__arm_vcx2qa): New.
	(__arm_vcx3q): New.
	(__arm_vcx3q_u8): New.
	(__arm_vcx3qa): New.
	* config/arm/arm_cde_builtins.def (vcx1q, vcx1qa, vcx2q, vcx2qa, vcx3q,
	vcx3qa): New builtins defined.
	* config/arm/arm_mve.h: Move typedefs and conversion intrinsics
	to arm_mve_types.h header.
	* config/arm/arm_mve_types.h: New file.
	* config/arm/mve.md (arm_vcx1qv16qi, arm_vcx1qav16qi, arm_vcx2qv16qi,
	arm_vcx2qav16qi, arm_vcx3qv16qi, arm_vcx3qav16qi): New patterns.
	* config/arm/predicates.md (const_int_mve_cde1_operand,
	const_int_mve_cde2_operand, const_int_mve_cde3_operand): New.

gcc/testsuite/ChangeLog:

2020-03-23  Matthew Malcomson  <matthew.malcomson@arm.com>
	    Dennis Zhang  <dennis.zhang@arm.com>

	* gcc.target/arm/acle/cde-mve-error-1.c: New test.
	* gcc.target/arm/acle/cde-mve-error-2.c: New test.
	* gcc.target/arm/acle/cde-mve-error-3.c: New test.
	* gcc.target/arm/acle/cde-mve-full-assembly.c: New test.
	* gcc.target/arm/acle/cde-mve-tests.c: New test.
	* lib/target-supports.exp (arm_v8_1m_main_cde_mve_fp): New check
	effective.
	(arm_v8_1m_main_cde_mve, arm_v8m_main_cde_fp): Use -mfpu=auto
	so we only check configurations that make sense.
This commit is contained in:
Matthew Malcomson 2020-04-08 16:06:47 +01:00
parent a5f3c89e1b
commit 78bf916376
16 changed files with 3070 additions and 1302 deletions

View File

@ -346,7 +346,7 @@ arc*-*-*)
arm*-*-*)
cpu_type=arm
extra_objs="arm-builtins.o aarch-common.o"
extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h arm_bf16.h arm_mve.h arm_cde.h"
extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h arm_bf16.h arm_mve_types.h arm_mve.h arm_cde.h"
target_type_format_char='%'
c_target_objs="arm-c.o"
cxx_target_objs="arm-c.o"

View File

@ -1833,10 +1833,22 @@ arm_init_acle_builtins (void)
arm_builtin_datum *d = &acle_builtin_data[i];
arm_init_builtin (fcode, d, "__builtin_arm");
}
}
fcode = ARM_BUILTIN_CDE_PATTERN_START;
static void
arm_init_cde_builtins (void)
{
unsigned int i, fcode = ARM_BUILTIN_CDE_PATTERN_START;
for (i = 0; i < ARRAY_SIZE (cde_builtin_data); i++, fcode++)
{
/* Only define CDE floating point builtins if the target has floating
point registers. NOTE: without HARD_FLOAT we don't have MVE, so we
can break out of this loop directly here. */
if (!TARGET_MAYBE_HARD_FLOAT && fcode >= ARM_BUILTIN_vcx1si)
break;
/* Only define CDE/MVE builtins if MVE is available. */
if (!TARGET_HAVE_MVE && fcode >= ARM_BUILTIN_vcx1qv16qi)
break;
arm_builtin_cde_datum *cde = &cde_builtin_data[i];
arm_builtin_datum *d = &cde->base;
arm_init_builtin (fcode, d, "__builtin_arm");
@ -2628,6 +2640,9 @@ arm_init_builtins (void)
arm_init_crypto_builtins ();
}
if (TARGET_CDE)
arm_init_cde_builtins ();
arm_init_acle_builtins ();
if (TARGET_MAYBE_HARD_FLOAT)
@ -4178,4 +4193,90 @@ arm_check_builtin_call (location_t , vec<location_t> , tree fndecl,
return true;
}
/* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN. This is currently only
used for the MVE related builtins for the CDE extension.
Here we ensure the type of arguments is such that the size is correct, and
then return a tree that describes the same function call but with the
relevant types cast as necessary. */
tree
arm_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist)
{
if (DECL_MD_FUNCTION_CODE (fndecl) <= ARM_BUILTIN_vcx1qv16qi
|| DECL_MD_FUNCTION_CODE (fndecl) >= ARM_BUILTIN_MVE_BASE)
return NULL_TREE;
vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (arglist);
unsigned param_num = params ? params->length() : 0;
unsigned num_args = list_length (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) - 1;
/* Ensure this function has the correct number of arguments.
This won't happen when using the intrinsics defined by the ACLE, since
they're exposed to the user via a wrapper in the arm_cde.h header that has
the correct number of arguments ... hence the compiler would already catch
an incorrect number of arguments there.
It is still possible to get here if the user tries to call the __bulitin_*
functions directly. We could print some error message in this function,
but instead we leave it to the rest of the code to catch this problem in
the same way that other __builtin_* functions catch it.
This does mean an odd error message, but it's consistent with the rest of
the builtins. */
if (param_num != num_args)
return NULL_TREE;
tree to_return = NULL_TREE;
/* Take the functions return type since that's the same type as the arguments
this function needs (the types of the builtin function all come from the
machine mode of the RTL pattern, and they're all the same and calculated
in the same way). */
tree pattern_type = TREE_TYPE (TREE_TYPE (fndecl));
unsigned i;
for (i = 1; i < (param_num - 1); i++)
{
tree this_param = (*params)[i];
if (TREE_CODE (this_param) == ERROR_MARK)
return NULL_TREE;
tree param_type = TREE_TYPE (this_param);
/* Return value is cast to type that second argument originally was.
All non-constant arguments are cast to the return type calculated from
the RTL pattern.
Set the return type to an unqualified version of the type of the first
parameter. The first parameter since that is how the intrinsics are
defined -- to always return the same type as the first polymorphic
argument. Unqualified version of the type since we don't want passing
a constant parameter to mean that the return value of the builtin is
also constant. */
if (i == 1)
to_return = build_qualified_type (param_type, 0 MEM_STAT_INFO);
/* The only requirement of these intrinsics on the type of the variable
is that it's 128 bits wide. All other types are valid and we simply
VIEW_CONVERT_EXPR them to the type of the underlying builtin. */
tree type_size = TYPE_SIZE (param_type);
if (! tree_fits_shwi_p (type_size)
|| tree_to_shwi (type_size) != 128)
{
error_at (loc,
"argument %u to function %qE is of type %qT which is not "
"known to be 128 bits wide",
i, fndecl, param_type);
return NULL_TREE;
}
/* Only convert the argument if we actually need to. */
if (! check_base_type (pattern_type, param_type))
(*params)[i] = build1 (VIEW_CONVERT_EXPR, pattern_type, this_param);
}
tree call_expr = build_call_expr_loc_array (loc, fndecl, param_num,
params->address());
gcc_assert (to_return != NULL_TREE);
if (! check_base_type (to_return, pattern_type))
return build1 (VIEW_CONVERT_EXPR, to_return, call_expr);
return call_expr;
}
#include "gt-arm-builtins.h"

View File

@ -28,6 +28,8 @@
#include "c-family/c-pragma.h"
#include "stringpool.h"
tree arm_resolve_overloaded_builtin (location_t, tree, void*);
/* Output C specific EABI object attributes. These cannot be done in
arm.c because they require information from the C frontend. */
@ -360,6 +362,7 @@ arm_register_target_pragmas (void)
{
/* Update pragma hook to allow parsing #pragma GCC target. */
targetm.target_option.pragma_parse = arm_pragma_target_parse;
targetm.resolve_overloaded_builtin = arm_resolve_overloaded_builtin;
#ifdef REGISTER_SUBTARGET_PRAGMAS
REGISTER_SUBTARGET_PRAGMAS ();

View File

@ -582,6 +582,9 @@ extern const int arm_arch_cde_coproc_bits[];
#define ARM_VCDE_CONST_1 ((1 << 11) - 1)
#define ARM_VCDE_CONST_2 ((1 << 6 ) - 1)
#define ARM_VCDE_CONST_3 ((1 << 3 ) - 1)
#define ARM_MVE_CDE_CONST_1 ((1 << 12) - 1)
#define ARM_MVE_CDE_CONST_2 ((1 << 7 ) - 1)
#define ARM_MVE_CDE_CONST_3 ((1 << 4 ) - 1)
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)

View File

@ -140,6 +140,28 @@ extern "C" {
#endif /* __ARM_FP || __ARM_FEATURE_MVE. */
#endif /* __ARM_FEATURE_CDE. */
#if __ARM_FEATURE_MVE
#include "arm_mve_types.h"
#define __arm_vcx1q_u8(coproc, imm) \
(uint8x16_t)__builtin_arm_vcx1qv16qi(coproc, imm)
#define __arm_vcx1qa(coproc, acc, imm) \
__builtin_arm_vcx1qav16qi(coproc, acc, imm)
#define __arm_vcx2q(coproc, n, imm) \
__builtin_arm_vcx2qv16qi(coproc, n, imm)
#define __arm_vcx2q_u8(coproc, n, imm) \
(uint8x16_t)__builtin_arm_vcx2qv16qi(coproc, n, imm)
#define __arm_vcx2qa(coproc, acc, n, imm) \
__builtin_arm_vcx2qav16qi(coproc, acc, n, imm)
#define __arm_vcx3q(coproc, n, m, imm) \
__builtin_arm_vcx3qv16qi(coproc, n, m, imm)
#define __arm_vcx3q_u8(coproc, n, m, imm) \
(uint8x16_t)__builtin_arm_vcx3qv16qi(coproc, n, m, imm)
#define __arm_vcx3qa(coproc, acc, n, m, imm) \
__builtin_arm_vcx3qav16qi(coproc, acc, n, m, imm)
#endif
#ifdef __cplusplus
}
#endif

View File

@ -37,4 +37,15 @@ CDE_VAR2 (CX_BINARY, vcx2a, si, di, ARM_VCDE_CONST_2, ECF_CONST)
CDE_VAR2 (CX_BINARY, vcx3, si, di, ARM_VCDE_CONST_3, ECF_CONST)
CDE_VAR2 (CX_TERNARY, vcx3a, si, di, ARM_VCDE_CONST_3, ECF_CONST)
/* NOTE: The MVE intrinsics must be defined at the end of this file, and with
vcx1q first.
These restrictions are relied on to determine which intrinsics need
overload resolution in `arm_resolve_overloaded_builtin`. */
VAR1 (CX_IMM, vcx1q, v16qi, ARM_MVE_CDE_CONST_1, ECF_CONST)
VAR1 (CX_UNARY, vcx1qa, v16qi, ARM_MVE_CDE_CONST_1, ECF_CONST)
VAR1 (CX_UNARY, vcx2q, v16qi, ARM_MVE_CDE_CONST_2, ECF_CONST)
VAR1 (CX_BINARY, vcx2qa, v16qi, ARM_MVE_CDE_CONST_2, ECF_CONST)
VAR1 (CX_BINARY, vcx3q, v16qi, ARM_MVE_CDE_CONST_3, ECF_CONST)
VAR1 (CX_TERNARY, vcx3qa, v16qi, ARM_MVE_CDE_CONST_3, ECF_CONST)
#undef CDE_VAR2

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -11301,3 +11301,74 @@
"vpst\;vshlct\t%q0, %1, %4"
[(set_attr "type" "mve_move")
(set_attr "length" "8")])
;; CDE instructions on MVE registers.
(define_insn "arm_vcx1qv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=t")
(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
(match_operand:SI 2 "const_int_mve_cde1_operand" "i")]
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx1\\tp%c1, %q0, #%c2"
[(set_attr "type" "coproc")]
)
(define_insn "arm_vcx1qav16qi"
[(set (match_operand:V16QI 0 "register_operand" "=t")
(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
(match_operand:V16QI 2 "register_operand" "0")
(match_operand:SI 3 "const_int_mve_cde1_operand" "i")]
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx1a\\tp%c1, %q0, #%c3"
[(set_attr "type" "coproc")]
)
(define_insn "arm_vcx2qv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=t")
(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
(match_operand:V16QI 2 "register_operand" "t")
(match_operand:SI 3 "const_int_mve_cde2_operand" "i")]
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx2\\tp%c1, %q0, %q2, #%c3"
[(set_attr "type" "coproc")]
)
(define_insn "arm_vcx2qav16qi"
[(set (match_operand:V16QI 0 "register_operand" "=t")
(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
(match_operand:V16QI 2 "register_operand" "0")
(match_operand:V16QI 3 "register_operand" "t")
(match_operand:SI 4 "const_int_mve_cde2_operand" "i")]
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx2a\\tp%c1, %q0, %q3, #%c4"
[(set_attr "type" "coproc")]
)
(define_insn "arm_vcx3qv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=t")
(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
(match_operand:V16QI 2 "register_operand" "t")
(match_operand:V16QI 3 "register_operand" "t")
(match_operand:SI 4 "const_int_mve_cde3_operand" "i")]
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx3\\tp%c1, %q0, %q2, %q3, #%c4"
[(set_attr "type" "coproc")]
)
(define_insn "arm_vcx3qav16qi"
[(set (match_operand:V16QI 0 "register_operand" "=t")
(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
(match_operand:V16QI 2 "register_operand" "0")
(match_operand:V16QI 3 "register_operand" "t")
(match_operand:V16QI 4 "register_operand" "t")
(match_operand:SI 5 "const_int_mve_cde3_operand" "i")]
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx3a\\tp%c1, %q0, %q3, %q4, #%c5"
[(set_attr "type" "coproc")]
)

View File

@ -255,6 +255,18 @@
(and (match_operand 0 "const_int_operand")
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_VCDE_CONST_3)")))
(define_predicate "const_int_mve_cde1_operand"
(and (match_operand 0 "const_int_operand")
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_MVE_CDE_CONST_1)")))
(define_predicate "const_int_mve_cde2_operand"
(and (match_operand 0 "const_int_operand")
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_MVE_CDE_CONST_2)")))
(define_predicate "const_int_mve_cde3_operand"
(and (match_operand 0 "const_int_operand")
(match_test "IN_RANGE (UINTVAL (op), 0, ARM_MVE_CDE_CONST_3)")))
;; This doesn't have to do much because the constant is already checked
;; in the shift_operator predicate.
(define_predicate "shift_amount_operand"

View File

@ -0,0 +1,42 @@
#include "arm_cde.h"
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
/* Ensure the error messages make sense when passing too many/too few arguments
to the intrinsic user-facing functions. */
uint8x16_t test_invalid_arguments (uint8x16_t n, uint8x16_t m)
{
uint8x16_t accum = __arm_vcx1q_u8 (0, 33, 1); /* { dg-error {macro "__arm_vcx1q_u8" passed 3 arguments, but takes just 2} } */
accum += __arm_vcx1qa (0, accum, 33, 1); /* { dg-error {macro "__arm_vcx1qa" passed 4 arguments, but takes just 3} } */
accum += __arm_vcx2q_u8 (0, n, 33, 1); /* { dg-error {macro "__arm_vcx2q_u8" passed 4 arguments, but takes just 3} } */
accum += __arm_vcx2q (0, n, 33, 1); /* { dg-error {macro "__arm_vcx2q" passed 4 arguments, but takes just 3} } */
accum += __arm_vcx2qa (0, accum, n, 33, 1); /* { dg-error {macro "__arm_vcx2qa" passed 5 arguments, but takes just 4} } */
accum += __arm_vcx3q_u8 (0, n, m, 33, 1); /* { dg-error {macro "__arm_vcx3q_u8" passed 5 arguments, but takes just 4} } */
accum += __arm_vcx3q (0, n, m, 33, 1); /* { dg-error {macro "__arm_vcx3q" passed 5 arguments, but takes just 4} } */
accum += __arm_vcx3qa (0, accum, n, m, 33, 1); /* { dg-error {macro "__arm_vcx3qa" passed 6 arguments, but takes just 5} } */
accum += __arm_vcx1q_u8 (0); /* { dg-error {macro "__arm_vcx1q_u8" requires 2 arguments, but only 1 given} } */
accum += __arm_vcx1qa (0, accum); /* { dg-error {macro "__arm_vcx1qa" requires 3 arguments, but only 2 given} } */
accum += __arm_vcx2q_u8 (0, n); /* { dg-error {macro "__arm_vcx2q_u8" requires 3 arguments, but only 2 given} } */
accum += __arm_vcx2q (0, n); /* { dg-error {macro "__arm_vcx2q" requires 3 arguments, but only 2 given} } */
accum += __arm_vcx2qa (0, accum, n); /* { dg-error {macro "__arm_vcx2qa" requires 4 arguments, but only 3 given} } */
accum += __arm_vcx3q_u8 (0, n, m); /* { dg-error {macro "__arm_vcx3q_u8" requires 4 arguments, but only 3 given} } */
accum += __arm_vcx3q (0, n, m); /* { dg-error {macro "__arm_vcx3q" requires 4 arguments, but only 3 given} } */
accum += __arm_vcx3qa (0, accum, n, m); /* { dg-error {macro "__arm_vcx3qa" requires 5 arguments, but only 4 given} } */
/* The preprocessor complains that the macro was given an invalid number of
arguments, and because of that ends up not expanding the macro but
rather just leaving the macro name in the source code. That macro name
results in these errors. */
/* { dg-error {'__arm_vcx1q_u8' undeclared \(first use in this function\)} "" { target { *-*-* } } 11 } */
/* { dg-error {'__arm_vcx1qa' undeclared \(first use in this function\)} "" { target { *-*-* } } 12 } */
/* { dg-error {'__arm_vcx2q_u8' undeclared \(first use in this function\)} "" { target { *-*-* } } 13 } */
/* { dg-error {'__arm_vcx2q' undeclared \(first use in this function\)} "" { target { *-*-* } } 14 } */
/* { dg-error {'__arm_vcx2qa' undeclared \(first use in this function\)} "" { target { *-*-* } } 15 } */
/* { dg-error {'__arm_vcx3q_u8' undeclared \(first use in this function\)} "" { target { *-*-* } } 16 } */
/* { dg-error {'__arm_vcx3q' undeclared \(first use in this function\)} "" { target { *-*-* } } 17 } */
/* { dg-error {'__arm_vcx3qa' undeclared \(first use in this function\)} "" { target { *-*-* } } 18 } */
return accum;
}

View File

@ -0,0 +1,103 @@
#include "arm_cde.h"
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
/* This file and cde-mve-error-tests.c are split since there are two kinds of
errors happening here. The errors in the other file cause the compiler to
not reach the errors found here, hence they need to be in a different file
so we can inspect these ones. */
uint8x16_t test_bad_immediates (uint8x16_t n, uint8x16_t m, int someval)
{
uint8x16_t accum = (uint8x16_t)(uint32x4_t){0, 0, 0, 0};
/* We always different constants for the pairs (__arm_vcx2q and
__arm_vcx2q_u8) and (__arm_vcx3q and __arm_vcx3q_u8) despite them mapping
to the same builtin and us wanting to test the same thing in each block.
This is because we have told the compiler that these functions are
constant and pure (i.e. produce a value solely based on their arguments
and have no side-effects).
With that information the compiler eliminates duplicate calls to the
functions, and we only get error messages for one of the pairs.
Hence, in order to get error messages for both function calls, we use
different constants. */
/* `coproc' not enabled. */
accum += __arm_vcx1q_u8 (1, 4095); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
accum += __arm_vcx1qa (1, accum, 4095); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
accum += __arm_vcx2q (1, n, 126); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
accum += __arm_vcx2q_u8 (1, n, 127); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
accum += __arm_vcx2qa (1, accum, n, 127); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
accum += __arm_vcx3q_u8 (1, n, m, 14); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
accum += __arm_vcx3q (1, n, m, 15); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
accum += __arm_vcx3qa (1, accum, n, m, 15); /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
/* `coproc' out of range. */
accum += __arm_vcx1q_u8 (8, 4095); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx1qa (8, accum, 4095); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2q (8, n, 126); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2q_u8 (8, n, 127); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2qa (8, accum, n, 127); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3q_u8 (8, n, m, 14); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3q (8, n, m, 15); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3qa (8, accum, n, m, 15); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
/* `imm' out of range. */
accum += __arm_vcx1q_u8 (0, 4096); /* { dg-error {argument 2 must be a constant immediate in range \[0-4095\]} } */
accum += __arm_vcx1qa (0, accum, 4096); /* { dg-error {argument 3 must be a constant immediate in range \[0-4095\]} } */
accum += __arm_vcx2q (0, n, 128); /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx2q_u8 (0, n, 129); /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx2qa (0, accum, n, 128); /* { dg-error {argument 4 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx3q_u8 (0, n, m, 16); /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
accum += __arm_vcx3q (0, n, m, 17); /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
accum += __arm_vcx3qa (0, accum, n, m, 16); /* { dg-error {argument 5 must be a constant immediate in range \[0-15\]} } */
/* `imm' is not an immediate. */
accum += __arm_vcx1q_u8 (0, someval); /* { dg-error {argument 2 must be a constant immediate in range \[0-4095\]} } */
accum += __arm_vcx1qa (0, accum, someval); /* { dg-error {argument 3 must be a constant immediate in range \[0-4095\]} } */
accum += __arm_vcx2q (0, n, someval); /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx2q_u8 (6, n, someval); /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx2qa (0, accum, n, someval); /* { dg-error {argument 4 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx3q_u8 (0, n, m, someval); /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
accum += __arm_vcx3q (6, n, m, someval); /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
accum += __arm_vcx3qa (0, accum, n, m, someval); /* { dg-error {argument 5 must be a constant immediate in range \[0-15\]} } */
/* `coproc' is not an immediate. */
accum += __arm_vcx1q_u8 (someval, 4095); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx1qa (someval, accum, 4095); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2q (someval, n, 126); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2q_u8 (someval, n, 127); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2qa (someval, accum, n, 127); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3q_u8 (someval, n, m, 14); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3q (someval, n, m, 15); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3qa (someval, accum, n, m, 15); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
/* `imm' is of wrong type. */
accum += __arm_vcx1q_u8 (0, ""); /* { dg-error {argument 2 must be a constant immediate in range \[0-4095\]} } */
accum += __arm_vcx1qa (0, accum, ""); /* { dg-error {argument 3 must be a constant immediate in range \[0-4095\]} } */
accum += __arm_vcx2q (0, n, ""); /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx2q_u8 (0, n, "x"); /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx2qa (0, accum, n, ""); /* { dg-error {argument 4 must be a constant immediate in range \[0-127\]} } */
accum += __arm_vcx3q_u8 (0, n, m, ""); /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
accum += __arm_vcx3q (0, n, m, "x"); /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
accum += __arm_vcx3qa (0, accum, n, m, ""); /* { dg-error {argument 5 must be a constant immediate in range \[0-15\]} } */
/* `coproc' is of wrong type. */
accum += __arm_vcx1q_u8 ("", 4095); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx1qa ("", accum, 4095); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2q ("", n, 126); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2q_u8 ("", n, 127); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx2qa ("", accum, n, 127); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3q_u8 ("", n, m, 14); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3q ("", n, m, 15); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
accum += __arm_vcx3qa ("", accum, n, m, 15); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
/* { dg-warning {passing argument 2 of '__builtin_arm_vcx1qv16qi' makes integer from pointer without a cast \[-Wint-conversion\]} "" { target *-*-* } 80 } */
/* { dg-warning {passing argument 1 of '__builtin_arm_vcx1qv16qi' makes integer from pointer without a cast \[-Wint-conversion\]} "" { target *-*-* } 90 } */
return accum;
}

View File

@ -0,0 +1,85 @@
#include "arm_cde.h"
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
uint32_t test (int8x16_t m, int8x16_t n)
{
/* Bad types for polymophic arguments. */
uint32_t accum = 0, n_int = 0;
accum += __arm_vcx1qa (0, accum, 4095);
accum += __arm_vcx2q (0, n_int, 126);
accum += __arm_vcx2q_u8 (0, n_int, 127);
accum += __arm_vcx2qa (0, accum, n, 127);
accum += __arm_vcx3q_u8 (0, n_int, m, 14);
accum += __arm_vcx3q (0, n_int, m, 15);
accum += __arm_vcx3qa (0, accum, n, m, 15);
/* { dg-error {argument 1 to function '__builtin_arm_vcx1qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 11 } */
/* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx1qav16qi'} "" { target *-*-* } 11 } */
/* { dg-error {argument 1 to function '__builtin_arm_vcx2qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 12 } */
/* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx2qv16qi'} "" { target *-*-* } 12 } */
/* { dg-error {argument 1 to function '__builtin_arm_vcx2qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 13 } */
/* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx2qv16qi'} "" { target *-*-* } 13 } */
/* { dg-error {argument 1 to function '__builtin_arm_vcx2qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 14 } */
/* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx2qav16qi'} "" { target *-*-* } 14 } */
/* { dg-error {argument 1 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 15 } */
/* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 15 } */
/* { dg-error {argument 1 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 16 } */
/* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 16 } */
/* { dg-error {argument 1 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 17 } */
/* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 17 } */
return accum;
}
int8x16_t test2 (int8x16_t m, int8x16_t n)
{
uint32_t n_int = 0, m_int = 0;
int8x16_t accum = (int8x16_t)(uint64x2_t) { 0, 0 };
accum += __arm_vcx2qa (0, accum, n_int, 127);
accum += __arm_vcx3q_u8 (0, n, m_int, 14);
accum += __arm_vcx3q (0, n, m_int, 15);
accum += __arm_vcx3qa (0, accum, n_int, m, 15);
accum += __arm_vcx3qa (0, accum, n_int, m, 15);
accum += __arm_vcx3qa (0, accum, n, m_int, 15);
accum += __arm_vcx3qa (0, accum, n, m_int, 15);
/* { dg-error {argument 2 to function '__builtin_arm_vcx2qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 40 } */
/* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx2qav16qi'} "" { target *-*-* } 40 } */
/* { dg-error {argument 2 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 41 } */
/* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 41 } */
/* { dg-error {argument 2 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 42 } */
/* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 42 } */
/* { dg-error {argument 2 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 43 } */
/* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 43 } */
/* { dg-error {argument 2 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 44 } */
/* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 44 } */
/* { dg-error {argument 3 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 45 } */
/* { dg-error {incompatible type for argument 4 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 45 } */
/* { dg-error {argument 3 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 46 } */
/* { dg-error {incompatible type for argument 4 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 46 } */
return accum;
}
/* Testing that undeclared variables work as expected.
(This to verify we fixed a problem hit during development). */
int8x16_t test3 (int8x16_t m, int8x16_t n)
{
int8x16_t accum = (int8x16_t)(uint64x2_t) { 0, 0 };
accum += __arm_vcx1qa (0, accum_int, 4095);
accum += __arm_vcx2q (0, n_int, 126);
accum += __arm_vcx2q_u8 (0, n_int, 127);
accum += __arm_vcx2qa (0, accum, n_int, 127);
accum += __arm_vcx3q_u8 (0, n_int, m, 14);
accum += __arm_vcx3q_u8 (0, n, m_int, 14);
accum += __arm_vcx3q (0, n_int, m, 15);
accum += __arm_vcx3q (0, n, m_int, 15);
accum += __arm_vcx3qa (0, accum, n_int, m, 15);
accum += __arm_vcx3qa (0, accum, n_int, m_int, 15);
/* { dg-error {'accum_int' undeclared \(first use in this function\)} "" { target *-*-* } 70 } */
/* { dg-error {'n_int' undeclared \(first use in this function\)} "" { target *-*-* } 71 } */
/* { dg-error {'m_int' undeclared \(first use in this function\)} "" { target *-*-* } 75 } */
return accum;
}

View File

@ -0,0 +1,557 @@
/* { dg-do compile } */
/* { dg-skip-if "Require optimisation to compile DCE tests" { *-*-* } { "-O0" "-mfloat-abi=softfp" } { "" } } */
/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
/* We use -ffast-math so that the addition of 0.0 to a value is assumed to not
change the value. This means the tests for float types can use the same
trick of adding to a value initialised to zero to check whether the RTL
patterns correctly mark that the incoming value is not used. */
/* { dg-additional-options "-ffast-math" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "cde-mve-tests.c"
/* NOTE:
We avoid matching the functions returning a __builtin_neon_ti value since
there are variations between processors that make matching the whole
function difficult.
Since moving a TImode value into an MVE 'Q' register takes a few
temporaries, this leaves many instructions which can end up being
scheduled in different ways. Matching the ways this ends up getting
scheduled and restructured is awkward, and the extra tests for this one
data type don't seem to be worth the confusing testcases. */
/*
** test_cde_vcx1q_u8float16x8_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8float32x4_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8uint8x16_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8uint16x8_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8uint32x4_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8uint64x2_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8int8x16_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8int16x8_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8int32x4_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1q_u8int64x2_tintint:
** vcx1 p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qafloat16x8_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qafloat32x4_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qauint8x16_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qauint16x8_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qauint32x4_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qauint64x2_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qaint8x16_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qaint16x8_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qaint32x4_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx1qaint64x2_tintint:
** vldr\.64 d0, \.L([0-9]*)
** vldr\.64 d1, \.L\1\+8
** vcx1a p0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8float16x8_tuint16x8_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8float16x8_tfloat32x4_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8float32x4_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8int64x2_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8int8x16_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8uint16x8_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8uint8x16_tint64x2_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8uint8x16_tint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8uint8x16_tuint16x8_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2q_u8uint8x16_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2qfloat16x8_tuint16x8_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2qfloat16x8_tfloat32x4_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2qfloat32x4_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2qint64x2_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2qint8x16_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2quint16x8_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2quint8x16_tint64x2_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2quint8x16_tint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2quint8x16_tuint16x8_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2quint8x16_tuint8x16_tint:
** vcx2 p0, q0, q0, #33
** bx lr
*/
/*
** test_cde_vcx2qafloat16x8_tuint16x8_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qafloat16x8_tfloat32x4_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qafloat32x4_tuint8x16_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qaint64x2_tuint8x16_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qaint8x16_tuint8x16_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qauint16x8_tuint8x16_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qauint8x16_tint64x2_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qauint8x16_tint8x16_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qauint8x16_tuint16x8_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx2qauint8x16_tuint8x16_tint:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx2a p0, (q[0-7]), q0, #33
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint16x8_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint16x8_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint16x8_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8float16x8_tfloat16x8_tfloat16x8_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8float32x4_tuint64x2_tfloat16x8_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8int8x16_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8int64x2_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tint64x2_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tint64x2_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3q_u8uint8x16_tint64x2_tint64x2_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3qfloat16x8_tfloat16x8_tfloat16x8_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3qfloat32x4_tuint64x2_tfloat16x8_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint16x8_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tuint16x8_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tuint16x8_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3qint8x16_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3qint64x2_tuint8x16_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tint64x2_tuint8x16_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tint64x2_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3quint8x16_tint64x2_tint64x2_t:
** vcx3 p0, q0, q0, q1, #12
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tuint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qafloat16x8_tfloat16x8_tfloat16x8_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qafloat32x4_tuint64x2_tfloat16x8_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint16x8_tuint8x16_tuint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tuint16x8_tuint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tuint16x8_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qaint8x16_tuint8x16_tuint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tint8x16_tuint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qaint64x2_tuint8x16_tuint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tint64x2_tuint8x16_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tint64x2_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/
/*
** test_cde_vcx3qauint8x16_tint64x2_tint64x2_t:
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L([0-9]*)
** vldr\.64 d(?:[01][0-4]|[0-9]), \.L\1\+8
** vcx3a p0, (q[0-7]), q0, q1, #12
** vmov q0, \2
** bx lr
*/

View File

@ -0,0 +1,722 @@
#include "arm_cde.h"
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
/* { dg-final { check-function-bodies "**" "" } } */
/* Test that the assembly is produced as expected.
Test that the same thing happens for each valid type.
(ensure we check *every* valid type, though we're not bothering with every
type combination, just checking "all same type" and "different types",
also want to check every valid type at least once) */
#define TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, accum_type, n_type, m_type, arguments) \
accum_type test_cde_##name##accum_type##n_type##m_type ( \
__attribute__ ((unused)) n_type n, \
__attribute__ ((unused)) m_type m) \
{ \
accum_type accum = (accum_type)(uint32x4_t){0,0,0,0}; \
accum += (accum_type) __arm_##name arguments; \
return accum; \
}
/* Use every valid type for the output -- demonstrate can use any 128 bit value
(which is a requirement for these intrinsics). */
#define TEST_CDE_MVE_INTRINSIC_1(name, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, __builtin_neon_ti, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float32x4_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint16x8_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint32x4_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint64x2_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int8x16_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int16x8_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int32x4_t, int, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int64x2_t, int, int, arguments) \
#define TEST_CDE_MVE_INTRINSIC_2(name, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, __builtin_neon_ti, uint8x16_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, __builtin_neon_ti, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, uint16x8_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, float32x4_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float32x4_t, uint8x16_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int64x2_t, uint8x16_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int8x16_t, uint8x16_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint16x8_t, uint8x16_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int64x2_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int8x16_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint16x8_t, int, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, int, arguments)
#define TEST_CDE_MVE_INTRINSIC_3(name, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, __builtin_neon_ti, uint8x16_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, __builtin_neon_ti, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, float16x8_t, float16x8_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float32x4_t, uint64x2_t, float16x8_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint16x8_t, uint8x16_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint16x8_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, uint16x8_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int8x16_t, uint8x16_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int8x16_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, int8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int64x2_t, uint8x16_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int64x2_t, uint8x16_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, int64x2_t, arguments) \
TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int64x2_t, int64x2_t, arguments)
TEST_CDE_MVE_INTRINSIC_1(vcx1q_u8, (0, 33))
TEST_CDE_MVE_INTRINSIC_1(vcx1qa, (0, accum, 33))
TEST_CDE_MVE_INTRINSIC_2(vcx2q_u8, (0, n, 33))
TEST_CDE_MVE_INTRINSIC_2(vcx2q, (0, n, 33))
TEST_CDE_MVE_INTRINSIC_2(vcx2qa, (0, accum, n, 33))
TEST_CDE_MVE_INTRINSIC_3(vcx3q_u8, (0, n, m, 12))
TEST_CDE_MVE_INTRINSIC_3(vcx3q, (0, n, m, 12))
TEST_CDE_MVE_INTRINSIC_3(vcx3qa, (0, accum, n, m, 12))
/* This testcase checks that in all compilations this C code produces the
expected CDE instructions from the above intrinsics.
Here we check that there are the expected number of `vcx*` occurences, and
that each function has the expected form in it.
Another testcase (cde-mve-full-assembly.c) checks that when using
`-mfloat-abi=hard` and when compiled with an FPU the above C code produces
code that demonstrates the compiler knows that the intrinsics are constant
and pure, and that demonstrates the compiler generates sane code from them.
That testcase needs these special arguments so it can ignore things like
accounting for the soft float ABI or leftovers from temporaries that are
later removed when generating code for a target with Floating Point
registers but without an FPU. */
/* { dg-final { scan-assembler-times "\tvcx1\t" 11 } } */
/* { dg-final { scan-assembler-times "\tvcx1a\t" 11 } } */
/* { dg-final { scan-assembler-times "\tvcx2\t" 24 } } */
/* { dg-final { scan-assembler-times "\tvcx2a\t" 12 } } */
/* { dg-final { scan-assembler-times "\tvcx3\t" 30 } } */
/* { dg-final { scan-assembler-times "\tvcx3a\t" 15 } } */
/*
** test_cde_vcx1q_u8__builtin_neon_tiintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8float16x8_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8float32x4_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8uint8x16_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8uint16x8_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8uint32x4_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8uint64x2_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8int8x16_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8int16x8_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8int32x4_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1q_u8int64x2_tintint:
** ...
** vcx1 p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qa__builtin_neon_tiintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qafloat16x8_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qafloat32x4_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qauint8x16_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qauint16x8_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qauint32x4_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qauint64x2_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qaint8x16_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qaint16x8_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qaint32x4_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx1qaint64x2_tintint:
** ...
** vcx1a p0, q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8__builtin_neon_tiuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8uint8x16_t__builtin_neon_tiint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8float16x8_tuint16x8_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8float16x8_tfloat32x4_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8float32x4_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8int64x2_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8int8x16_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8uint16x8_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8uint8x16_tint64x2_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8uint8x16_tint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8uint8x16_tuint16x8_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q_u8uint8x16_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2q__builtin_neon_tiuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2quint8x16_t__builtin_neon_tiint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qfloat16x8_tuint16x8_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qfloat16x8_tfloat32x4_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qfloat32x4_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qint64x2_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qint8x16_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2quint16x8_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2quint8x16_tint64x2_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2quint8x16_tint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2quint8x16_tuint16x8_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2quint8x16_tuint8x16_tint:
** ...
** vcx2 p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qa__builtin_neon_tiuint8x16_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qauint8x16_t__builtin_neon_tiint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qafloat16x8_tuint16x8_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qafloat16x8_tfloat32x4_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qafloat32x4_tuint8x16_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qaint64x2_tuint8x16_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qaint8x16_tuint8x16_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qauint16x8_tuint8x16_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qauint8x16_tint64x2_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qauint8x16_tint8x16_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qauint8x16_tuint16x8_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx2qauint8x16_tuint8x16_tint:
** ...
** vcx2a p0, q[0-7], q[0-7], #33
** ...
*/
/*
** test_cde_vcx3q_u8__builtin_neon_tiuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_t__builtin_neon_ti:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint16x8_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint16x8_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint16x8_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8float16x8_tfloat16x8_tfloat16x8_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8float32x4_tuint64x2_tfloat16x8_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8int8x16_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8int64x2_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tint64x2_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tuint8x16_tint64x2_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q_u8uint8x16_tint64x2_tint64x2_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3q__builtin_neon_tiuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_t__builtin_neon_ti:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qfloat16x8_tfloat16x8_tfloat16x8_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qfloat32x4_tuint64x2_tfloat16x8_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint16x8_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tuint16x8_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tuint16x8_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qint8x16_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qint64x2_tuint8x16_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tint64x2_tuint8x16_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tuint8x16_tint64x2_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3quint8x16_tint64x2_tint64x2_t:
** ...
** vcx3 p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qa__builtin_neon_tiuint8x16_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_t__builtin_neon_ti:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qafloat16x8_tfloat16x8_tfloat16x8_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qafloat32x4_tuint64x2_tfloat16x8_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint16x8_tuint8x16_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tuint16x8_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tuint16x8_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qaint8x16_tuint8x16_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tint8x16_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qaint64x2_tuint8x16_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tint64x2_tuint8x16_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tuint8x16_tint64x2_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/
/*
** test_cde_vcx3qauint8x16_tint64x2_tint64x2_t:
** ...
** vcx3a p0, q[0-7], q[0-7], q[0-7], #12
** ...
*/

View File

@ -5120,11 +5120,14 @@ foreach { armfunc armflag armdef } {
"-march=armv8-m.main+cdecp0+cdecp6 -mthumb"
"defined (__ARM_FEATURE_CDE)"
arm_v8m_main_cde_fp
"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb"
"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb -mfpu=auto"
"defined (__ARM_FEATURE_CDE) && defined (__ARM_FP)"
arm_v8_1m_main_cde_mve
"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb"
"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb -mfpu=auto"
"defined (__ARM_FEATURE_CDE) && defined (__ARM_FEATURE_MVE)"
arm_v8_1m_main_cde_mve_fp
"-march=armv8.1-m.main+mve.fp+cdecp0+cdecp6 -mthumb -mfpu=auto"
"defined (__ARM_FEATURE_CDE) || __ARM_FEATURE_MVE == 3"
} {
eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
proc check_effective_target_FUNC_ok_nocache { } {