aarch64: Add support for unpacked SVE MULH

This patch extends the SMULH and UMULH support to unpacked vectors. The type suffix must be taken from the element size rather than the container size. The main use of these patterns is to support division and modulus by a constant. The conditional forms would be hard to trigger from non-ACLE code, and ACLE code needs fully-packed vectors only. gcc/ * config/aarch64/aarch64-sve.md (<su>mul<mode>3_highpart) (@aarch64_pred_<MUL_HIGHPART:optab><mode>): Extend from SVE_FULL_I to SVE_I. gcc/testsuite/ * gcc.target/aarch64/sve/mul_highpart_3.c: New test.
2025-04-16 13:11:24 +08:00 · 2021-01-11 18:03:24 +00:00 · 2021-01-11 18:03:24 +00:00 · 7446de5a2a
commit 7446de5a2a
parent 907ea37955
2 changed files with 44 additions and 10 deletions
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@ -4192,12 +4192,12 @@

 ;; Unpredicated highpart multiplication.
 (define_expand "<su>mul<mode>3_highpart"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-	(unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
 	  [(match_dup 3)
-	   (unspec:SVE_FULL_I
-	     [(match_operand:SVE_FULL_I 1 "register_operand")
-	      (match_operand:SVE_FULL_I 2 "register_operand")]
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 1 "register_operand")
+	      (match_operand:SVE_I 2 "register_operand")]
 	     MUL_HIGHPART)]
 	  UNSPEC_PRED_X))]
  "TARGET_SVE"
@ -4208,12 +4208,12 @@

 ;; Predicated highpart multiplication.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-	(unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (unspec:SVE_FULL_I
-	     [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
-	      (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 2 "register_operand" "%0, w")
+	      (match_operand:SVE_I 3 "register_operand" "w, w")]
 	     MUL_HIGHPART)]
 	  UNSPEC_PRED_X))]
  "TARGET_SVE"
--- a/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c
@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE test##_##TYPE##_reg (TYPE a, TYPE b) { return a % 17; }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (int8_t, 32)
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (int8_t, 64)
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (int16_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (int8_t, 128)
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (int16_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (int32_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */