mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-18 10:00:35 +08:00
arm: Auto-vectorization for MVE: vmul
This patch enables MVE vmul instructions for auto-vectorization. It includes MVE in expander mul<mode>3 to enable vectorization for MVE. Related MVE vmul insns are modified to support the expander by using expression 'mult' instead of unspec. The mul<mode>3 for vectorization in vec-common.md uses mode iterator VDQWH instead of VALLW to cover all supported modes. The macros ARM_HAVE_NEON_<MODE>_ARITH are used to select supported modes for different targets. The redundant mul<mode>3 in neon.md is removed. gcc/ChangeLog: 2020-10-22 Dennis Zhang <dennis.zhang@arm.com> * config/arm/mve.md (mve_vmulq<mode>): New entry for vmul instruction using expression 'mult'. (mve_vmulq_f<mode>): Use mult instead of VMULQ_F. * config/arm/neon.md (mul<mode>3): Removed. * config/arm/vec-common.md (mul<mode>3): Use the new mode macros ARM_HAVE_<MODE>_ARITH. Use mode iterator VDQWH instead of VALLW. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/mve-vmul_1.c: New test.
This commit is contained in:
parent
ca5f4666f7
commit
0f41b5e02f
@ -1,3 +1,12 @@
|
||||
2020-10-22 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* config/arm/mve.md (mve_vmulq<mode>): New entry for vmul instruction
|
||||
using expression 'mult'.
|
||||
(mve_vmulq_f<mode>): Use mult instead of VMULQ_F.
|
||||
* config/arm/neon.md (mul<mode>3): Removed.
|
||||
* config/arm/vec-common.md (mul<mode>3): Use the new mode macros
|
||||
ARM_HAVE_<MODE>_ARITH. Use mode iterator VDQWH instead of VALLW.
|
||||
|
||||
2020-10-20 Andrew MacLeod <amacleod@redhat.com>
|
||||
|
||||
PR tree-optimization/97505
|
||||
|
@ -1551,6 +1551,17 @@
|
||||
[(set_attr "type" "mve_move")
|
||||
])
|
||||
|
||||
(define_insn "mve_vmulq<mode>"
|
||||
[
|
||||
(set (match_operand:MVE_2 0 "s_register_operand" "=w")
|
||||
(mult:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
|
||||
(match_operand:MVE_2 2 "s_register_operand" "w")))
|
||||
]
|
||||
"TARGET_HAVE_MVE"
|
||||
"vmul.i%#<V_sz_elem>\t%q0, %q1, %q2"
|
||||
[(set_attr "type" "mve_move")
|
||||
])
|
||||
|
||||
;;
|
||||
;; [vornq_u, vornq_s])
|
||||
;;
|
||||
@ -2562,9 +2573,8 @@
|
||||
(define_insn "mve_vmulq_f<mode>"
|
||||
[
|
||||
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
|
||||
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
|
||||
(match_operand:MVE_0 2 "s_register_operand" "w")]
|
||||
VMULQ_F))
|
||||
(mult:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
|
||||
(match_operand:MVE_0 2 "s_register_operand" "w")))
|
||||
]
|
||||
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
|
||||
"vmul.f%#<V_sz_elem> %q0, %q1, %q2"
|
||||
|
@ -1732,17 +1732,6 @@
|
||||
(const_string "neon_mul_<V_elem_ch><q>")))]
|
||||
)
|
||||
|
||||
(define_insn "mul<mode>3"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
(mult:VH
|
||||
(match_operand:VH 1 "s_register_operand" "w")
|
||||
(match_operand:VH 2 "s_register_operand" "w")))]
|
||||
"ARM_HAVE_NEON_<MODE>_ARITH"
|
||||
"vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vmulf<mode>"
|
||||
[(set
|
||||
(match_operand:VH 0 "s_register_operand" "=w")
|
||||
|
@ -101,14 +101,11 @@
|
||||
})
|
||||
|
||||
(define_expand "mul<mode>3"
|
||||
[(set (match_operand:VALLW 0 "s_register_operand")
|
||||
(mult:VALLW (match_operand:VALLW 1 "s_register_operand")
|
||||
(match_operand:VALLW 2 "s_register_operand")))]
|
||||
"(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
|
||||
|| flag_unsafe_math_optimizations))
|
||||
|| (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
|
||||
{
|
||||
})
|
||||
[(set (match_operand:VDQWH 0 "s_register_operand")
|
||||
(mult:VDQWH (match_operand:VDQWH 1 "s_register_operand")
|
||||
(match_operand:VDQWH 2 "s_register_operand")))]
|
||||
"ARM_HAVE_<MODE>_ARITH"
|
||||
)
|
||||
|
||||
(define_expand "smin<mode>3"
|
||||
[(set (match_operand:VALLW 0 "s_register_operand")
|
||||
|
@ -1,3 +1,7 @@
|
||||
2020-10-22 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* gcc.target/arm/simd/mve-vmul_1.c: New test.
|
||||
|
||||
2020-10-20 Jeff Law <law@redhat.com>
|
||||
|
||||
* gcc.dg/Wbuiltin-declaration-mismatch-9.c: Improve pruning of
|
||||
|
64
gcc/testsuite/gcc.target/arm/simd/mve-vmul_1.c
Normal file
64
gcc/testsuite/gcc.target/arm/simd/mve-vmul_1.c
Normal file
@ -0,0 +1,64 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
/* { dg-additional-options "-O3" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void test_vmul_i32 (int32_t * dest, int32_t * a, int32_t * b) {
|
||||
int i;
|
||||
for (i=0; i<4; i++) {
|
||||
dest[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void test_vmul_i32_u (uint32_t * dest, uint32_t * a, uint32_t * b) {
|
||||
int i;
|
||||
for (i=0; i<4; i++) {
|
||||
dest[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vmul\.i32\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
|
||||
|
||||
void test_vmul_i16 (int16_t * dest, int16_t * a, int16_t * b) {
|
||||
int i;
|
||||
for (i=0; i<8; i++) {
|
||||
dest[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void test_vmul_i16_u (uint16_t * dest, uint16_t * a, uint16_t * b) {
|
||||
int i;
|
||||
for (i=0; i<8; i++) {
|
||||
dest[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vmul\.i16\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
|
||||
|
||||
void test_vmul_i8 (int8_t * dest, int8_t * a, int8_t * b) {
|
||||
int i;
|
||||
for (i=0; i<16; i++) {
|
||||
dest[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void test_vmul_i8_u (uint8_t * dest, uint8_t * a, uint8_t * b) {
|
||||
int i;
|
||||
for (i=0; i<16; i++) {
|
||||
dest[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vmul\.i8\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
|
||||
|
||||
void test_vmul_f32 (float * dest, float * a, float * b) {
|
||||
int i;
|
||||
for (i=0; i<4; i++) {
|
||||
dest[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vmul\.f32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
|
||||
|
Loading…
x
Reference in New Issue
Block a user