[Arm] Implement CDE intrinsics for MVE registers.

Implement CDE intrinsics on MVE registers. Other than the basics required for adding intrinsics this patch consists of three changes. ** We separate out the MVE types and casts from the arm_mve.h header. This is so that the types can be used in arm_cde.h without the need to include the entire arm_mve.h header. The only type that arm_cde.h needs is `uint8x16_t`, so this separation could be avoided by using a `typedef` in this file. Since the introduced intrinsics are all defined to act on the full range of MVE types, declaring all such types seems intuitive since it will provide their declaration to the user too. This arm_mve_types.h header not only includes the MVE types, but also the conversion intrinsics between them. Some of the conversion intrinsics are needed for arm_cde.h, but most are not. We include all conversion intrinsics to keep the definition of such conversion functions all in one place, on the understanding that extra conversion functions being defined when including `arm_cde.h` is not a problem. ** We define the TARGET_RESOLVE_OVERLOADED_BUILTIN hook for the Arm backend. This is needed to implement the polymorphism for the required intrinsics. The intrinsics have no specialised version, and the resulting assembly instruction for all different types should be exactly the same. Due to this we have implemented these intrinsics via one builtin on one type. All other calls to the intrinsic with different types are implicitly cast to the one type that is defined, and hence are all expanded to the same RTL pattern that is only defined for one machine mode. ** We seperate the initialisation of the CDE intrinsics from others. This allows us to ensure that the CDE intrinsics acting on MVE registers are only created when both CDE and MVE are available. Only initialising these builtins when both features are available is especially important since they require a type that is only initialised when the target supports hard float. Hence trying to initialise these builtins on a soft float target would cause an ICE. Testing done: Full bootstrap and regtest on arm-none-linux-gnueabihf Regression test on arm-none-eabi Ok for trunk? gcc/ChangeLog: 2020-03-10 Matthew Malcomson <matthew.malcomson@arm.com> * config.gcc (arm_mve_types.h): New extra_header for arm. * config/arm/arm-builtins.c (arm_resolve_overloaded_builtin): New. (arm_init_cde_builtins): New. (arm_init_acle_builtins): Remove initialisation of CDE builtins. (arm_init_builtins): Call arm_init_cde_builtins when target supports CDE. * config/arm/arm-c.c (arm_resolve_overloaded_builtin): New declaration. (arm_register_target_pragmas): Initialise resolve_overloaded_builtin hook to the implementation for the arm backend. * config/arm/arm.h (ARM_MVE_CDE_CONST_1): New. (ARM_MVE_CDE_CONST_2): New. (ARM_MVE_CDE_CONST_3): New. * config/arm/arm_cde.h (__arm_vcx1q_u8): New. (__arm_vcx1qa): New. (__arm_vcx2q): New. (__arm_vcx2q_u8): New. (__arm_vcx2qa): New. (__arm_vcx3q): New. (__arm_vcx3q_u8): New. (__arm_vcx3qa): New. * config/arm/arm_cde_builtins.def (vcx1q, vcx1qa, vcx2q, vcx2qa, vcx3q, vcx3qa): New builtins defined. * config/arm/arm_mve.h: Move typedefs and conversion intrinsics to arm_mve_types.h header. * config/arm/arm_mve_types.h: New file. * config/arm/mve.md (arm_vcx1qv16qi, arm_vcx1qav16qi, arm_vcx2qv16qi, arm_vcx2qav16qi, arm_vcx3qv16qi, arm_vcx3qav16qi): New patterns. * config/arm/predicates.md (const_int_mve_cde1_operand, const_int_mve_cde2_operand, const_int_mve_cde3_operand): New. gcc/testsuite/ChangeLog: 2020-03-23 Matthew Malcomson <matthew.malcomson@arm.com> Dennis Zhang <dennis.zhang@arm.com> * gcc.target/arm/acle/cde-mve-error-1.c: New test. * gcc.target/arm/acle/cde-mve-error-2.c: New test. * gcc.target/arm/acle/cde-mve-error-3.c: New test. * gcc.target/arm/acle/cde-mve-full-assembly.c: New test. * gcc.target/arm/acle/cde-mve-tests.c: New test. * lib/target-supports.exp (arm_v8_1m_main_cde_mve_fp): New check effective. (arm_v8_1m_main_cde_mve, arm_v8m_main_cde_fp): Use -mfpu=auto so we only check configurations that make sense.
2025-03-21 17:30:54 +08:00 · 2020-04-08 16:06:47 +01:00 · 2020-04-08 16:06:47 +01:00 · 78bf916376
commit 78bf916376
parent a5f3c89e1b
16 changed files with 3070 additions and 1302 deletions
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@ -346,7 +346,7 @@ arc*-*-*)
 arm*-*-*)
 	cpu_type=arm
 	extra_objs="arm-builtins.o aarch-common.o"
-	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h arm_bf16.h arm_mve.h arm_cde.h"
+	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h arm_bf16.h arm_mve_types.h arm_mve.h arm_cde.h"
 	target_type_format_char='%'
 	c_target_objs="arm-c.o"
 	cxx_target_objs="arm-c.o"
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@ -1833,10 +1833,22 @@ arm_init_acle_builtins (void)
      arm_builtin_datum *d = &acle_builtin_data[i];
      arm_init_builtin (fcode, d, "__builtin_arm");
    }
+}

-  fcode = ARM_BUILTIN_CDE_PATTERN_START;
+static void
+arm_init_cde_builtins (void)
+{
+  unsigned int i, fcode = ARM_BUILTIN_CDE_PATTERN_START;
  for (i = 0; i < ARRAY_SIZE (cde_builtin_data); i++, fcode++)
    {
+      /* Only define CDE floating point builtins if the target has floating
+	 point registers.  NOTE: without HARD_FLOAT we don't have MVE, so we
+	 can break out of this loop directly here.  */
+      if (!TARGET_MAYBE_HARD_FLOAT && fcode >= ARM_BUILTIN_vcx1si)
+	break;
+      /* Only define CDE/MVE builtins if MVE is available.  */
+      if (!TARGET_HAVE_MVE && fcode >= ARM_BUILTIN_vcx1qv16qi)
+	break;
      arm_builtin_cde_datum *cde = &cde_builtin_data[i];
      arm_builtin_datum *d = &cde->base;
      arm_init_builtin (fcode, d, "__builtin_arm");
@ -2628,6 +2640,9 @@ arm_init_builtins (void)
      arm_init_crypto_builtins ();
    }

+  if (TARGET_CDE)
+    arm_init_cde_builtins ();
+
  arm_init_acle_builtins ();

  if (TARGET_MAYBE_HARD_FLOAT)
@ -4178,4 +4193,90 @@ arm_check_builtin_call (location_t , vec<location_t> , tree fndecl,
  return true;
 }

+/* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN.  This is currently only
+   used for the MVE related builtins for the CDE extension.
+   Here we ensure the type of arguments is such that the size is correct, and
+   then return a tree that describes the same function call but with the
+   relevant types cast as necessary.  */
+tree
+arm_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist)
+{
+  if (DECL_MD_FUNCTION_CODE (fndecl) <= ARM_BUILTIN_vcx1qv16qi
+      || DECL_MD_FUNCTION_CODE (fndecl) >= ARM_BUILTIN_MVE_BASE)
+    return NULL_TREE;
+
+  vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (arglist);
+  unsigned param_num = params ? params->length() : 0;
+  unsigned num_args = list_length (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) - 1;
+  /* Ensure this function has the correct number of arguments.
+     This won't happen when using the intrinsics defined by the ACLE, since
+     they're exposed to the user via a wrapper in the arm_cde.h header that has
+     the correct number of arguments ... hence the compiler would already catch
+     an incorrect number of arguments there.
+
+     It is still possible to get here if the user tries to call the __bulitin_*
+     functions directly.  We could print some error message in this function,
+     but instead we leave it to the rest of the code to catch this problem in
+     the same way that other __builtin_* functions catch it.
+
+     This does mean an odd error message, but it's consistent with the rest of
+     the builtins.  */
+  if (param_num != num_args)
+    return NULL_TREE;
+
+  tree to_return = NULL_TREE;
+  /* Take the functions return type since that's the same type as the arguments
+     this function needs (the types of the builtin function all come from the
+     machine mode of the RTL pattern, and they're all the same and calculated
+     in the same way).  */
+  tree pattern_type = TREE_TYPE (TREE_TYPE (fndecl));
+
+  unsigned i;
+  for (i = 1; i < (param_num - 1); i++)
+    {
+      tree this_param = (*params)[i];
+      if (TREE_CODE (this_param) == ERROR_MARK)
+	return NULL_TREE;
+      tree param_type = TREE_TYPE (this_param);
+
+      /* Return value is cast to type that second argument originally was.
+	 All non-constant arguments are cast to the return type calculated from
+	 the RTL pattern.
+
+	 Set the return type to an unqualified version of the type of the first
+	 parameter.  The first parameter since that is how the intrinsics are
+	 defined -- to always return the same type as the first polymorphic
+	 argument.  Unqualified version of the type since we don't want passing
+	 a constant parameter to mean that the return value of the builtin is
+	 also constant.  */
+      if (i == 1)
+	to_return = build_qualified_type (param_type, 0 MEM_STAT_INFO);
+
+      /* The only requirement of these intrinsics on the type of the variable
+	 is that it's 128 bits wide.  All other types are valid and we simply
+	 VIEW_CONVERT_EXPR them to the type of the underlying builtin.  */
+      tree type_size = TYPE_SIZE (param_type);
+      if (! tree_fits_shwi_p (type_size)
+	  || tree_to_shwi (type_size) != 128)
+	{
+	  error_at (loc,
+		    "argument %u to function %qE is of type %qT which is not "
+		    "known to be 128 bits wide",
+		    i, fndecl, param_type);
+	  return NULL_TREE;
+	}
+
+      /* Only convert the argument if we actually need to.  */
+      if (! check_base_type (pattern_type, param_type))
+	(*params)[i] = build1 (VIEW_CONVERT_EXPR, pattern_type, this_param);
+    }
+  tree call_expr = build_call_expr_loc_array (loc, fndecl, param_num,
+					      params->address());
+
+  gcc_assert (to_return != NULL_TREE);
+  if (! check_base_type (to_return, pattern_type))
+    return build1 (VIEW_CONVERT_EXPR, to_return, call_expr);
+  return call_expr;
+}
+
 #include "gt-arm-builtins.h"
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@ -28,6 +28,8 @@
 #include "c-family/c-pragma.h"
 #include "stringpool.h"

+tree arm_resolve_overloaded_builtin (location_t, tree, void*);
+
 /* Output C specific EABI object attributes.  These cannot be done in
   arm.c because they require information from the C frontend.  */

@ -360,6 +362,7 @@ arm_register_target_pragmas (void)
 {
  /* Update pragma hook to allow parsing #pragma GCC target.  */
  targetm.target_option.pragma_parse = arm_pragma_target_parse;
+  targetm.resolve_overloaded_builtin = arm_resolve_overloaded_builtin;

 #ifdef REGISTER_SUBTARGET_PRAGMAS
  REGISTER_SUBTARGET_PRAGMAS ();
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@ -582,6 +582,9 @@ extern const int arm_arch_cde_coproc_bits[];
 #define ARM_VCDE_CONST_1	((1 << 11) - 1)
 #define ARM_VCDE_CONST_2	((1 << 6 ) - 1)
 #define ARM_VCDE_CONST_3	((1 << 3 ) - 1)
+#define ARM_MVE_CDE_CONST_1	((1 << 12) - 1)
+#define ARM_MVE_CDE_CONST_2	((1 << 7 ) - 1)
+#define ARM_MVE_CDE_CONST_3	((1 << 4 ) - 1)

 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT  (MASK_APCS_FRAME)
--- a/gcc/config/arm/arm_cde.h
+++ b/gcc/config/arm/arm_cde.h
@ -140,6 +140,28 @@ extern "C" {
 #endif /* __ARM_FP || __ARM_FEATURE_MVE.  */
 #endif /* __ARM_FEATURE_CDE.  */

+#if __ARM_FEATURE_MVE
+#include "arm_mve_types.h"
+
+#define __arm_vcx1q_u8(coproc, imm) \
+	(uint8x16_t)__builtin_arm_vcx1qv16qi(coproc, imm)
+#define __arm_vcx1qa(coproc, acc, imm) \
+	__builtin_arm_vcx1qav16qi(coproc, acc, imm)
+#define __arm_vcx2q(coproc, n, imm) \
+	__builtin_arm_vcx2qv16qi(coproc, n, imm)
+#define __arm_vcx2q_u8(coproc, n, imm) \
+	(uint8x16_t)__builtin_arm_vcx2qv16qi(coproc, n, imm)
+#define __arm_vcx2qa(coproc, acc, n, imm) \
+	__builtin_arm_vcx2qav16qi(coproc, acc, n, imm)
+#define __arm_vcx3q(coproc, n, m, imm) \
+	__builtin_arm_vcx3qv16qi(coproc, n, m, imm)
+#define __arm_vcx3q_u8(coproc, n, m, imm) \
+	(uint8x16_t)__builtin_arm_vcx3qv16qi(coproc, n, m, imm)
+#define __arm_vcx3qa(coproc, acc, n, m, imm) \
+	__builtin_arm_vcx3qav16qi(coproc, acc, n, m, imm)
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif
--- a/gcc/config/arm/arm_cde_builtins.def
+++ b/gcc/config/arm/arm_cde_builtins.def
@ -37,4 +37,15 @@ CDE_VAR2 (CX_BINARY, vcx2a, si, di, ARM_VCDE_CONST_2, ECF_CONST)
 CDE_VAR2 (CX_BINARY, vcx3, si, di, ARM_VCDE_CONST_3, ECF_CONST)
 CDE_VAR2 (CX_TERNARY, vcx3a, si, di, ARM_VCDE_CONST_3, ECF_CONST)

+/* NOTE: The MVE intrinsics must be defined at the end of this file, and with
+	 vcx1q first.
+         These restrictions are relied on to determine which intrinsics need
+	 overload resolution in `arm_resolve_overloaded_builtin`.  */
+VAR1 (CX_IMM, vcx1q, v16qi, ARM_MVE_CDE_CONST_1, ECF_CONST)
+VAR1 (CX_UNARY, vcx1qa, v16qi, ARM_MVE_CDE_CONST_1, ECF_CONST)
+VAR1 (CX_UNARY, vcx2q, v16qi, ARM_MVE_CDE_CONST_2, ECF_CONST)
+VAR1 (CX_BINARY, vcx2qa, v16qi, ARM_MVE_CDE_CONST_2, ECF_CONST)
+VAR1 (CX_BINARY, vcx3q, v16qi, ARM_MVE_CDE_CONST_3, ECF_CONST)
+VAR1 (CX_TERNARY, vcx3qa, v16qi, ARM_MVE_CDE_CONST_3, ECF_CONST)
+
 #undef CDE_VAR2
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
--- a/gcc/config/arm/arm_mve_types.h
+++ b/gcc/config/arm/arm_mve_types.h
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@ -11301,3 +11301,74 @@
 "vpst\;vshlct\t%q0, %1, %4"
 [(set_attr "type" "mve_move")
  (set_attr "length" "8")])
+
+;; CDE instructions on MVE registers.
+
+(define_insn "arm_vcx1qv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=t")
+	(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+			   (match_operand:SI 2 "const_int_mve_cde1_operand" "i")]
+	 UNSPEC_VCDE))]
+  "TARGET_CDE && TARGET_HAVE_MVE"
+  "vcx1\\tp%c1, %q0, #%c2"
+  [(set_attr "type" "coproc")]
+)
+
+(define_insn "arm_vcx1qav16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=t")
+	(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+			    (match_operand:V16QI 2 "register_operand" "0")
+			    (match_operand:SI 3 "const_int_mve_cde1_operand" "i")]
+	 UNSPEC_VCDEA))]
+  "TARGET_CDE && TARGET_HAVE_MVE"
+  "vcx1a\\tp%c1, %q0, #%c3"
+  [(set_attr "type" "coproc")]
+)
+
+(define_insn "arm_vcx2qv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=t")
+	(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+			  (match_operand:V16QI 2 "register_operand" "t")
+			  (match_operand:SI 3 "const_int_mve_cde2_operand" "i")]
+	 UNSPEC_VCDE))]
+  "TARGET_CDE && TARGET_HAVE_MVE"
+  "vcx2\\tp%c1, %q0, %q2, #%c3"
+  [(set_attr "type" "coproc")]
+)
+
+(define_insn "arm_vcx2qav16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=t")
+	(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+			  (match_operand:V16QI 2 "register_operand" "0")
+			  (match_operand:V16QI 3 "register_operand" "t")
+			  (match_operand:SI 4 "const_int_mve_cde2_operand" "i")]
+	 UNSPEC_VCDEA))]
+  "TARGET_CDE && TARGET_HAVE_MVE"
+  "vcx2a\\tp%c1, %q0, %q3, #%c4"
+  [(set_attr "type" "coproc")]
+)
+
+(define_insn "arm_vcx3qv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=t")
+	(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+			  (match_operand:V16QI 2 "register_operand" "t")
+			  (match_operand:V16QI 3 "register_operand" "t")
+			  (match_operand:SI 4 "const_int_mve_cde3_operand" "i")]
+	 UNSPEC_VCDE))]
+  "TARGET_CDE && TARGET_HAVE_MVE"
+  "vcx3\\tp%c1, %q0, %q2, %q3, #%c4"
+  [(set_attr "type" "coproc")]
+)
+
+(define_insn "arm_vcx3qav16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=t")
+	(unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+			  (match_operand:V16QI 2 "register_operand" "0")
+			  (match_operand:V16QI 3 "register_operand" "t")
+			  (match_operand:V16QI 4 "register_operand" "t")
+			  (match_operand:SI 5 "const_int_mve_cde3_operand" "i")]
+	 UNSPEC_VCDEA))]
+  "TARGET_CDE && TARGET_HAVE_MVE"
+  "vcx3a\\tp%c1, %q0, %q3, %q4, #%c5"
+  [(set_attr "type" "coproc")]
+)
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@ -255,6 +255,18 @@
  (and (match_operand 0 "const_int_operand")
       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_VCDE_CONST_3)")))

+(define_predicate "const_int_mve_cde1_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_MVE_CDE_CONST_1)")))
+
+(define_predicate "const_int_mve_cde2_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_MVE_CDE_CONST_2)")))
+
+(define_predicate "const_int_mve_cde3_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_MVE_CDE_CONST_3)")))
+
 ;; This doesn't have to do much because the constant is already checked
 ;; in the shift_operator predicate.
 (define_predicate "shift_amount_operand"
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-1.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-1.c
@ -0,0 +1,42 @@
+#include "arm_cde.h"
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
+
+/* Ensure the error messages make sense when passing too many/too few arguments
+   to the intrinsic user-facing functions.  */
+uint8x16_t test_invalid_arguments (uint8x16_t n, uint8x16_t m)
+{
+  uint8x16_t accum = __arm_vcx1q_u8 (0, 33, 1);   /* { dg-error {macro "__arm_vcx1q_u8" passed 3 arguments, but takes just 2} } */
+  accum += __arm_vcx1qa (0, accum, 33, 1);        /* { dg-error {macro "__arm_vcx1qa" passed 4 arguments, but takes just 3} } */
+  accum += __arm_vcx2q_u8 (0, n, 33, 1);          /* { dg-error {macro "__arm_vcx2q_u8" passed 4 arguments, but takes just 3} } */
+  accum += __arm_vcx2q (0, n, 33, 1);             /* { dg-error {macro "__arm_vcx2q" passed 4 arguments, but takes just 3} } */
+  accum += __arm_vcx2qa (0, accum, n, 33, 1);     /* { dg-error {macro "__arm_vcx2qa" passed 5 arguments, but takes just 4} } */
+  accum += __arm_vcx3q_u8 (0, n, m, 33, 1);       /* { dg-error {macro "__arm_vcx3q_u8" passed 5 arguments, but takes just 4} } */
+  accum += __arm_vcx3q (0, n, m, 33, 1);          /* { dg-error {macro "__arm_vcx3q" passed 5 arguments, but takes just 4} } */
+  accum += __arm_vcx3qa (0, accum, n, m, 33, 1);  /* { dg-error {macro "__arm_vcx3qa" passed 6 arguments, but takes just 5} } */
+  accum += __arm_vcx1q_u8 (0);                    /* { dg-error {macro "__arm_vcx1q_u8" requires 2 arguments, but only 1 given} } */
+  accum += __arm_vcx1qa (0, accum);               /* { dg-error {macro "__arm_vcx1qa" requires 3 arguments, but only 2 given} } */
+  accum += __arm_vcx2q_u8 (0, n);                 /* { dg-error {macro "__arm_vcx2q_u8" requires 3 arguments, but only 2 given} } */
+  accum += __arm_vcx2q (0, n);                    /* { dg-error {macro "__arm_vcx2q" requires 3 arguments, but only 2 given} } */
+  accum += __arm_vcx2qa (0, accum, n);            /* { dg-error {macro "__arm_vcx2qa" requires 4 arguments, but only 3 given} } */
+  accum += __arm_vcx3q_u8 (0, n, m);              /* { dg-error {macro "__arm_vcx3q_u8" requires 4 arguments, but only 3 given} } */
+  accum += __arm_vcx3q (0, n, m);                 /* { dg-error {macro "__arm_vcx3q" requires 4 arguments, but only 3 given} } */
+  accum += __arm_vcx3qa (0, accum, n, m);         /* { dg-error {macro "__arm_vcx3qa" requires 5 arguments, but only 4 given} } */
+
+  /* The preprocessor complains that the macro was given an invalid number of
+     arguments, and because of that ends up not expanding the macro but
+     rather just leaving the macro name in the source code.  That macro name
+     results in these errors.  */
+  /* { dg-error {'__arm_vcx1q_u8' undeclared \(first use in this function\)}  "" { target { *-*-* } } 11 } */
+  /* { dg-error {'__arm_vcx1qa' undeclared \(first use in this function\)}  "" { target { *-*-* } } 12 } */
+  /* { dg-error {'__arm_vcx2q_u8' undeclared \(first use in this function\)}  "" { target { *-*-* } } 13 } */
+  /* { dg-error {'__arm_vcx2q' undeclared \(first use in this function\)}  "" { target { *-*-* } } 14 } */
+  /* { dg-error {'__arm_vcx2qa' undeclared \(first use in this function\)}  "" { target { *-*-* } } 15 } */
+  /* { dg-error {'__arm_vcx3q_u8' undeclared \(first use in this function\)}  "" { target { *-*-* } } 16 } */
+  /* { dg-error {'__arm_vcx3q' undeclared \(first use in this function\)}  "" { target { *-*-* } } 17 } */
+  /* { dg-error {'__arm_vcx3qa' undeclared \(first use in this function\)}  "" { target { *-*-* } } 18 } */
+
+  return accum;
+}
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c
@ -0,0 +1,103 @@
+#include "arm_cde.h"
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
+
+/* This file and cde-mve-error-tests.c are split since there are two kinds of
+   errors happening here.  The errors in the other file cause the compiler to
+   not reach the errors found here, hence they need to be in a different file
+   so we can inspect these ones.  */
+
+uint8x16_t test_bad_immediates (uint8x16_t n, uint8x16_t m, int someval)
+{
+  uint8x16_t accum = (uint8x16_t)(uint32x4_t){0, 0, 0, 0};
+
+  /* We always different constants for the pairs (__arm_vcx2q and
+     __arm_vcx2q_u8) and (__arm_vcx3q and __arm_vcx3q_u8) despite them mapping
+     to the same builtin and us wanting to test the same thing in each block.
+
+     This is because we have told the compiler that these functions are
+     constant and pure (i.e. produce a value solely based on their arguments
+     and have no side-effects).
+
+     With that information the compiler eliminates duplicate calls to the
+     functions, and we only get error messages for one of the pairs.
+     Hence, in order to get error messages for both function calls, we use
+     different constants.  */
+
+  /* `coproc' not enabled.  */
+  accum += __arm_vcx1q_u8 (1, 4095);           /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+  accum += __arm_vcx1qa (1, accum, 4095);      /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+  accum += __arm_vcx2q (1, n, 126);            /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+  accum += __arm_vcx2q_u8 (1, n, 127);         /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+  accum += __arm_vcx2qa (1, accum, n, 127);    /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+  accum += __arm_vcx3q_u8 (1, n, m, 14);       /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+  accum += __arm_vcx3q (1, n, m, 15);          /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+  accum += __arm_vcx3qa (1, accum, n, m, 15);  /* { dg-error {coprocessor 1 is not enabled with \+cdecp1} } */
+
+  /* `coproc' out of range.  */
+  accum += __arm_vcx1q_u8 (8, 4095);           /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx1qa (8, accum, 4095);      /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2q (8, n, 126);            /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2q_u8 (8, n, 127);         /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2qa (8, accum, n, 127);    /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3q_u8 (8, n, m, 14);       /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3q (8, n, m, 15);          /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3qa (8, accum, n, m, 15);  /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  /* `imm' out of range.  */
+  accum += __arm_vcx1q_u8 (0, 4096);           /* { dg-error {argument 2 must be a constant immediate in range \[0-4095\]} } */
+  accum += __arm_vcx1qa (0, accum, 4096);      /* { dg-error {argument 3 must be a constant immediate in range \[0-4095\]} } */
+  accum += __arm_vcx2q (0, n, 128);            /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx2q_u8 (0, n, 129);         /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx2qa (0, accum, n, 128);    /* { dg-error {argument 4 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx3q_u8 (0, n, m, 16);       /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
+  accum += __arm_vcx3q (0, n, m, 17);          /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
+  accum += __arm_vcx3qa (0, accum, n, m, 16);  /* { dg-error {argument 5 must be a constant immediate in range \[0-15\]} } */
+
+  /* `imm' is not an immediate.  */
+  accum += __arm_vcx1q_u8 (0, someval);             /* { dg-error {argument 2 must be a constant immediate in range \[0-4095\]} } */
+  accum += __arm_vcx1qa (0, accum, someval);        /* { dg-error {argument 3 must be a constant immediate in range \[0-4095\]} } */
+  accum += __arm_vcx2q (0, n, someval);             /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx2q_u8 (6, n, someval);          /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx2qa (0, accum, n, someval);     /* { dg-error {argument 4 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx3q_u8 (0, n, m, someval);       /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
+  accum += __arm_vcx3q (6, n, m, someval);          /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
+  accum += __arm_vcx3qa (0, accum, n, m, someval);  /* { dg-error {argument 5 must be a constant immediate in range \[0-15\]} } */
+
+  /* `coproc' is not an immediate.  */
+  accum += __arm_vcx1q_u8 (someval, 4095);           /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx1qa (someval, accum, 4095);      /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2q (someval, n, 126);            /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2q_u8 (someval, n, 127);         /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2qa (someval, accum, n, 127);    /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3q_u8 (someval, n, m, 14);       /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3q (someval, n, m, 15);          /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3qa (someval, accum, n, m, 15);  /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  /* `imm' is of wrong type.  */
+  accum += __arm_vcx1q_u8 (0, "");           /* { dg-error {argument 2 must be a constant immediate in range \[0-4095\]} } */
+  accum += __arm_vcx1qa (0, accum, "");      /* { dg-error {argument 3 must be a constant immediate in range \[0-4095\]} } */
+  accum += __arm_vcx2q (0, n, "");            /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx2q_u8 (0, n, "x");         /* { dg-error {argument 3 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx2qa (0, accum, n, "");    /* { dg-error {argument 4 must be a constant immediate in range \[0-127\]} } */
+  accum += __arm_vcx3q_u8 (0, n, m, "");       /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
+  accum += __arm_vcx3q (0, n, m, "x");          /* { dg-error {argument 4 must be a constant immediate in range \[0-15\]} } */
+  accum += __arm_vcx3qa (0, accum, n, m, "");  /* { dg-error {argument 5 must be a constant immediate in range \[0-15\]} } */
+
+  /* `coproc' is of wrong type.  */
+  accum += __arm_vcx1q_u8 ("", 4095);           /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx1qa ("", accum, 4095);      /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2q ("", n, 126);            /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2q_u8 ("", n, 127);         /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx2qa ("", accum, n, 127);    /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3q_u8 ("", n, m, 14);       /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3q ("", n, m, 15);          /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_vcx3qa ("", accum, n, m, 15);  /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  /* { dg-warning {passing argument 2 of '__builtin_arm_vcx1qv16qi' makes integer from pointer without a cast \[-Wint-conversion\]} "" { target *-*-* } 80 } */
+  /* { dg-warning {passing argument 1 of '__builtin_arm_vcx1qv16qi' makes integer from pointer without a cast \[-Wint-conversion\]} "" { target *-*-* } 90 } */
+
+  return accum;
+}
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-3.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-3.c
@ -0,0 +1,85 @@
+#include "arm_cde.h"
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
+
+uint32_t test (int8x16_t m, int8x16_t n)
+{
+  /* Bad types for polymophic arguments.  */
+  uint32_t accum = 0, n_int = 0;
+  accum += __arm_vcx1qa (0, accum, 4095);
+  accum += __arm_vcx2q (0, n_int, 126);
+  accum += __arm_vcx2q_u8 (0, n_int, 127);
+  accum += __arm_vcx2qa (0, accum, n, 127);
+  accum += __arm_vcx3q_u8 (0, n_int, m, 14);
+  accum += __arm_vcx3q (0, n_int, m, 15);
+  accum += __arm_vcx3qa (0, accum, n, m, 15);
+
+  /* { dg-error {argument 1 to function '__builtin_arm_vcx1qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 11 } */
+  /* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx1qav16qi'} "" { target *-*-* } 11 } */
+  /* { dg-error {argument 1 to function '__builtin_arm_vcx2qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 12 } */
+  /* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx2qv16qi'} "" { target *-*-* } 12 } */
+  /* { dg-error {argument 1 to function '__builtin_arm_vcx2qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 13 } */
+  /* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx2qv16qi'} "" { target *-*-* } 13 } */
+  /* { dg-error {argument 1 to function '__builtin_arm_vcx2qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 14 } */
+  /* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx2qav16qi'} "" { target *-*-* } 14 } */
+  /* { dg-error {argument 1 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 15 } */
+  /* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 15 } */
+  /* { dg-error {argument 1 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 16 } */
+  /* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 16 } */
+  /* { dg-error {argument 1 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 17 } */
+  /* { dg-error {incompatible type for argument 2 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 17 } */
+  return accum;
+}
+
+int8x16_t test2 (int8x16_t m, int8x16_t n)
+{
+  uint32_t n_int = 0, m_int = 0;
+  int8x16_t accum = (int8x16_t)(uint64x2_t) { 0, 0 };
+  accum += __arm_vcx2qa (0, accum, n_int, 127);
+  accum += __arm_vcx3q_u8 (0, n, m_int, 14);
+  accum += __arm_vcx3q (0, n, m_int, 15);
+  accum += __arm_vcx3qa (0, accum, n_int, m, 15);
+  accum += __arm_vcx3qa (0, accum, n_int, m, 15);
+  accum += __arm_vcx3qa (0, accum, n, m_int, 15);
+  accum += __arm_vcx3qa (0, accum, n, m_int, 15);
+
+  /* { dg-error {argument 2 to function '__builtin_arm_vcx2qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 40 } */
+  /* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx2qav16qi'} "" { target *-*-* } 40 } */
+  /* { dg-error {argument 2 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 41 } */
+  /* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 41 } */
+  /* { dg-error {argument 2 to function '__builtin_arm_vcx3qv16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 42 } */
+  /* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qv16qi'} "" { target *-*-* } 42 } */
+  /* { dg-error {argument 2 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 43 } */
+  /* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 43 } */
+  /* { dg-error {argument 2 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 44 } */
+  /* { dg-error {incompatible type for argument 3 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 44 } */
+  /* { dg-error {argument 3 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 45 } */
+  /* { dg-error {incompatible type for argument 4 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 45 } */
+  /* { dg-error {argument 3 to function '__builtin_arm_vcx3qav16qi' is of type 'uint32_t' {aka '(?:long )?unsigned int'} which is not known to be 128 bits wide} "" { target *-*-* } 46 } */
+  /* { dg-error {incompatible type for argument 4 of '__builtin_arm_vcx3qav16qi'} "" { target *-*-* } 46 } */
+  return accum;
+}
+
+/* Testing that undeclared variables work as expected.
+   (This to verify we fixed a problem hit during development).  */
+int8x16_t test3 (int8x16_t m, int8x16_t n)
+{
+  int8x16_t accum = (int8x16_t)(uint64x2_t) { 0, 0 };
+  accum += __arm_vcx1qa (0, accum_int, 4095);
+  accum += __arm_vcx2q (0, n_int, 126);
+  accum += __arm_vcx2q_u8 (0, n_int, 127);
+  accum += __arm_vcx2qa (0, accum, n_int, 127);
+  accum += __arm_vcx3q_u8 (0, n_int, m, 14);
+  accum += __arm_vcx3q_u8 (0, n, m_int, 14);
+  accum += __arm_vcx3q (0, n_int, m, 15);
+  accum += __arm_vcx3q (0, n, m_int, 15);
+  accum += __arm_vcx3qa (0, accum, n_int, m, 15);
+  accum += __arm_vcx3qa (0, accum, n_int, m_int, 15);
+
+  /* { dg-error {'accum_int' undeclared \(first use in this function\)} "" { target *-*-* } 70 } */
+  /* { dg-error {'n_int' undeclared \(first use in this function\)} "" { target *-*-* } 71 } */
+  /* { dg-error {'m_int' undeclared \(first use in this function\)} "" { target *-*-* } 75 } */
+  return accum;
+}
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
@ -0,0 +1,557 @@
+/* { dg-do compile } */
+/* { dg-skip-if "Require optimisation to compile DCE tests" { *-*-* } { "-O0" "-mfloat-abi=softfp" } { "" } } */
+/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
+/* We use -ffast-math so that the addition of 0.0 to a value is assumed to not
+   change the value.  This means the tests for float types can use the same
+   trick of adding to a value initialised to zero to check whether the RTL
+   patterns correctly mark that the incoming value is not used.  */
+/* { dg-additional-options "-ffast-math" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "cde-mve-tests.c"
+
+/* NOTE:
+     We avoid matching the functions returning a __builtin_neon_ti value since
+     there are variations between processors that make matching the whole
+     function difficult.
+     Since moving a TImode value into an MVE 'Q' register takes a few
+     temporaries, this leaves many instructions which can end up being
+     scheduled in different ways.  Matching the ways this ends up getting
+     scheduled and restructured is awkward, and the extra tests for this one
+     data type don't seem to be worth the confusing testcases.  */
+
+/*
+** test_cde_vcx1q_u8float16x8_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8float32x4_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8uint8x16_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8uint16x8_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8uint32x4_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8uint64x2_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8int8x16_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8int16x8_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8int32x4_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1q_u8int64x2_tintint:
+** 	vcx1	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qafloat16x8_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qafloat32x4_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qauint8x16_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qauint16x8_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qauint32x4_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qauint64x2_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qaint8x16_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qaint16x8_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qaint32x4_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx1qaint64x2_tintint:
+** 	vldr\.64	d0, \.L([0-9]*)
+** 	vldr\.64	d1, \.L\1\+8
+** 	vcx1a	p0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8float16x8_tuint16x8_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8float16x8_tfloat32x4_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8float32x4_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8int64x2_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8int8x16_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8uint16x8_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tint64x2_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tuint16x8_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qfloat16x8_tuint16x8_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qfloat16x8_tfloat32x4_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qfloat32x4_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qint64x2_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qint8x16_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2quint16x8_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2quint8x16_tint64x2_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2quint8x16_tint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2quint8x16_tuint16x8_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2quint8x16_tuint8x16_tint:
+** 	vcx2	p0, q0, q0, #33
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qafloat16x8_tuint16x8_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qafloat16x8_tfloat32x4_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qafloat32x4_tuint8x16_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qaint64x2_tuint8x16_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qaint8x16_tuint8x16_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qauint16x8_tuint8x16_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qauint8x16_tint64x2_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qauint8x16_tint8x16_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qauint8x16_tuint16x8_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx2qauint8x16_tuint8x16_tint:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx2a	p0, (q[0-7]), q0, #33
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint16x8_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint16x8_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint16x8_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8float16x8_tfloat16x8_tfloat16x8_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8float32x4_tuint64x2_tfloat16x8_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8int8x16_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8int64x2_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tint64x2_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tint64x2_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tint64x2_tint64x2_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qfloat16x8_tfloat16x8_tfloat16x8_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qfloat32x4_tuint64x2_tfloat16x8_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint16x8_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tuint16x8_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tuint16x8_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qint8x16_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qint64x2_tuint8x16_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tint64x2_tuint8x16_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tint64x2_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3quint8x16_tint64x2_tint64x2_t:
+** 	vcx3	p0, q0, q0, q1, #12
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tuint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qafloat16x8_tfloat16x8_tfloat16x8_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qafloat32x4_tuint64x2_tfloat16x8_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint16x8_tuint8x16_tuint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint16x8_tuint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tuint16x8_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qaint8x16_tuint8x16_tuint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tint8x16_tuint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qaint64x2_tuint8x16_tuint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tint64x2_tuint8x16_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tint64x2_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
+/*
+** test_cde_vcx3qauint8x16_tint64x2_tint64x2_t:
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L([0-9]*)
+** 	vldr\.64	d(?:[01][0-4]|[0-9]), \.L\1\+8
+** 	vcx3a	p0, (q[0-7]), q0, q1, #12
+** 	vmov	q0, \2
+** 	bx	lr
+*/
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-tests.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-tests.c
@ -0,0 +1,722 @@
+#include "arm_cde.h"
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+
+/* Test that the assembly is produced as expected.
+   Test that the same thing happens for each valid type.
+     (ensure we check *every* valid type, though we're not bothering with every
+     type combination, just checking "all same type" and "different types",
+     also want to check every valid type at least once)  */
+
+#define TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, accum_type, n_type, m_type, arguments) \
+  accum_type test_cde_##name##accum_type##n_type##m_type ( \
+		      __attribute__ ((unused)) n_type n, \
+		      __attribute__ ((unused)) m_type m) \
+  {   \
+    accum_type accum = (accum_type)(uint32x4_t){0,0,0,0};  \
+    accum += (accum_type) __arm_##name arguments; \
+    return accum; \
+  }
+
+/* Use every valid type for the output -- demonstrate can use any 128 bit value
+   (which is a requirement for these intrinsics).  */
+#define TEST_CDE_MVE_INTRINSIC_1(name, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, __builtin_neon_ti, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float32x4_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint16x8_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint32x4_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint64x2_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int8x16_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int16x8_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int32x4_t, int, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int64x2_t, int, int, arguments) \
+
+#define TEST_CDE_MVE_INTRINSIC_2(name, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, __builtin_neon_ti, uint8x16_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, __builtin_neon_ti, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, uint16x8_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, float32x4_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float32x4_t, uint8x16_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int64x2_t, uint8x16_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int8x16_t, uint8x16_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint16x8_t, uint8x16_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int64x2_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int8x16_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint16x8_t, int, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, int, arguments)
+
+#define TEST_CDE_MVE_INTRINSIC_3(name, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, __builtin_neon_ti, uint8x16_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, __builtin_neon_ti, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float16x8_t, float16x8_t, float16x8_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, float32x4_t, uint64x2_t, float16x8_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint16x8_t, uint8x16_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint16x8_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, uint16x8_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int8x16_t, uint8x16_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int8x16_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, int8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, int64x2_t, uint8x16_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int64x2_t, uint8x16_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, uint8x16_t, int64x2_t, arguments) \
+  TEST_CDE_MVE_INTRINSIC_SPECIFIED_TYPE(name, uint8x16_t, int64x2_t, int64x2_t, arguments)
+
+TEST_CDE_MVE_INTRINSIC_1(vcx1q_u8, (0, 33))
+TEST_CDE_MVE_INTRINSIC_1(vcx1qa, (0, accum, 33))
+
+TEST_CDE_MVE_INTRINSIC_2(vcx2q_u8, (0, n, 33))
+TEST_CDE_MVE_INTRINSIC_2(vcx2q, (0, n, 33))
+TEST_CDE_MVE_INTRINSIC_2(vcx2qa, (0, accum, n, 33))
+
+TEST_CDE_MVE_INTRINSIC_3(vcx3q_u8, (0, n, m, 12))
+TEST_CDE_MVE_INTRINSIC_3(vcx3q, (0, n, m, 12))
+TEST_CDE_MVE_INTRINSIC_3(vcx3qa, (0, accum, n, m, 12))
+
+/* This testcase checks that in all compilations this C code produces the
+   expected CDE instructions from the above intrinsics.
+
+   Here we check that there are the expected number of `vcx*` occurences, and
+   that each function has the expected form in it.
+
+   Another testcase (cde-mve-full-assembly.c) checks that when using
+   `-mfloat-abi=hard` and when compiled with an FPU the above C code produces
+   code that demonstrates the compiler knows that the intrinsics are constant
+   and pure, and that demonstrates the compiler generates sane code from them.
+   That testcase needs these special arguments so it can ignore things like
+   accounting for the soft float ABI or leftovers from temporaries that are
+   later removed when generating code for a target with Floating Point
+   registers but without an FPU.  */
+
+/* { dg-final { scan-assembler-times "\tvcx1\t" 11 } } */
+/* { dg-final { scan-assembler-times "\tvcx1a\t" 11 } } */
+/* { dg-final { scan-assembler-times "\tvcx2\t" 24 } } */
+/* { dg-final { scan-assembler-times "\tvcx2a\t" 12 } } */
+/* { dg-final { scan-assembler-times "\tvcx3\t" 30 } } */
+/* { dg-final { scan-assembler-times "\tvcx3a\t" 15 } } */
+
+/*
+** test_cde_vcx1q_u8__builtin_neon_tiintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8float16x8_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8float32x4_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8uint8x16_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8uint16x8_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8uint32x4_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8uint64x2_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8int8x16_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8int16x8_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8int32x4_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1q_u8int64x2_tintint:
+** 	...
+** 	vcx1	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qa__builtin_neon_tiintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qafloat16x8_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qafloat32x4_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qauint8x16_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qauint16x8_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qauint32x4_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qauint64x2_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qaint8x16_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qaint16x8_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qaint32x4_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx1qaint64x2_tintint:
+** 	...
+** 	vcx1a	p0, q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8__builtin_neon_tiuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_t__builtin_neon_tiint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8float16x8_tuint16x8_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8float16x8_tfloat32x4_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8float32x4_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8int64x2_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8int8x16_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8uint16x8_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tint64x2_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tuint16x8_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q_u8uint8x16_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2q__builtin_neon_tiuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2quint8x16_t__builtin_neon_tiint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qfloat16x8_tuint16x8_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qfloat16x8_tfloat32x4_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qfloat32x4_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qint64x2_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qint8x16_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2quint16x8_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2quint8x16_tint64x2_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2quint8x16_tint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2quint8x16_tuint16x8_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2quint8x16_tuint8x16_tint:
+** 	...
+** 	vcx2	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qa__builtin_neon_tiuint8x16_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qauint8x16_t__builtin_neon_tiint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qafloat16x8_tuint16x8_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qafloat16x8_tfloat32x4_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qafloat32x4_tuint8x16_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qaint64x2_tuint8x16_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qaint8x16_tuint8x16_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qauint16x8_tuint8x16_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qauint8x16_tint64x2_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qauint8x16_tint8x16_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qauint8x16_tuint16x8_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx2qauint8x16_tuint8x16_tint:
+** 	...
+** 	vcx2a	p0, q[0-7], q[0-7], #33
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8__builtin_neon_tiuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_t__builtin_neon_ti:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint16x8_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint16x8_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tuint16x8_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8float16x8_tfloat16x8_tfloat16x8_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8float32x4_tuint64x2_tfloat16x8_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8int8x16_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8int64x2_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tint64x2_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tuint8x16_tint64x2_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q_u8uint8x16_tint64x2_tint64x2_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3q__builtin_neon_tiuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_t__builtin_neon_ti:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qfloat16x8_tfloat16x8_tfloat16x8_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qfloat32x4_tuint64x2_tfloat16x8_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint16x8_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tuint16x8_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tuint16x8_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qint8x16_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qint64x2_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tint64x2_tuint8x16_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tuint8x16_tint64x2_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3quint8x16_tint64x2_tint64x2_t:
+** 	...
+** 	vcx3	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qa__builtin_neon_tiuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_t__builtin_neon_ti:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qafloat16x8_tfloat16x8_tfloat16x8_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qafloat32x4_tuint64x2_tfloat16x8_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint16x8_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint16x8_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tuint16x8_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qaint8x16_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tint8x16_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qaint64x2_tuint8x16_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tint64x2_tuint8x16_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tuint8x16_tint64x2_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+/*
+** test_cde_vcx3qauint8x16_tint64x2_tint64x2_t:
+** 	...
+** 	vcx3a	p0, q[0-7], q[0-7], q[0-7], #12
+** 	...
+*/
+
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@ -5120,11 +5120,14 @@ foreach { armfunc armflag armdef } {
 		"-march=armv8-m.main+cdecp0+cdecp6 -mthumb"
 		"defined (__ARM_FEATURE_CDE)"
 	arm_v8m_main_cde_fp
-		"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb"
+		"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb -mfpu=auto"
 		"defined (__ARM_FEATURE_CDE) && defined (__ARM_FP)"
 	arm_v8_1m_main_cde_mve
-		"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb"
+		"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb -mfpu=auto"
 		"defined (__ARM_FEATURE_CDE) && defined (__ARM_FEATURE_MVE)"
+	arm_v8_1m_main_cde_mve_fp
+		"-march=armv8.1-m.main+mve.fp+cdecp0+cdecp6 -mthumb -mfpu=auto"
+		"defined (__ARM_FEATURE_CDE) || __ARM_FEATURE_MVE == 3"
 	} {
    eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
 	proc check_effective_target_FUNC_ok_nocache { } {