mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-20 09:20:42 +08:00
Add support for vectorized fma.
2012-09-11 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Matthew Gretton-Dann <matthew.gretton-dann@arm.com> * config/arm/neon.md (fma<VCVTF:mode>4): New pattern. (*fmsub<VCVTF:mode>4): Likewise. * doc/sourcebuild.texi (arm_neon_v2_ok, arm_neon_v2_hw): Document it. 2012-09-11 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Matthew Gretton-Dann <matthew.gretton-dann@arm.com> * gcc.target/arm/neon-vfma-1.c: New testcase. * gcc.target/arm/neon-vfms-1.c: Likewise. * gcc.target/arm/neon-vmla-1.c: Update test to use int instead of float. * gcc.target/arm/neon-vmls-1.c: Likewise. * lib/target-supports.exp (add_options_for_arm_neonv2): New function. (check_effective_target_arm_neonv2_ok_nocache): Likewise. (check_effective_target_arm_neonv2_ok): Likewise. (check_effective_target_arm_neonv2_hw): Likewise. (check_effective_target_arm_neonv2): Likewise. Co-Authored-By: Matthew Gretton-Dann <matthew.gretton-dann@arm.com> From-SVN: r191180
This commit is contained in:
parent
e0a6637cbd
commit
8b2ab9cbe4
@ -1,3 +1,10 @@
|
||||
2012-09-11 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
|
||||
Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
|
||||
|
||||
* config/arm/neon.md (fma<VCVTF:mode>4): New pattern.
|
||||
(*fmsub<VCVTF:mode>4): Likewise.
|
||||
* doc/sourcebuild.texi (arm_neon_v2_ok, arm_neon_v2_hw): Document it.
|
||||
|
||||
2012-09-11 Aldy Hernandez <aldyh@redhat.com>
|
||||
|
||||
PR middle-end/54149
|
||||
|
@ -707,6 +707,33 @@
|
||||
(const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
|
||||
)
|
||||
|
||||
;; Fused multiply-accumulate
|
||||
(define_insn "fma<VCVTF:mode>4"
|
||||
[(set (match_operand:VCVTF 0 "register_operand" "=w")
|
||||
(fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
|
||||
(match_operand:VCVTF 2 "register_operand" "w")
|
||||
(match_operand:VCVTF 3 "register_operand" "0")))]
|
||||
"TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
|
||||
"vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set (attr "neon_type")
|
||||
(if_then_else (match_test "<Is_d_reg>")
|
||||
(const_string "neon_fp_vmla_ddd")
|
||||
(const_string "neon_fp_vmla_qqq")))]
|
||||
)
|
||||
|
||||
(define_insn "*fmsub<VCVTF:mode>4"
|
||||
[(set (match_operand:VCVTF 0 "register_operand" "=w")
|
||||
(fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
|
||||
(match_operand:VCVTF 2 "register_operand" "w")
|
||||
(match_operand:VCVTF 3 "register_operand" "0")))]
|
||||
"TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
|
||||
"vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set (attr "neon_type")
|
||||
(if_then_else (match_test "<Is_d_reg>")
|
||||
(const_string "neon_fp_vmla_ddd")
|
||||
(const_string "neon_fp_vmla_qqq")))]
|
||||
)
|
||||
|
||||
(define_insn "ior<mode>3"
|
||||
[(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
|
||||
(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
|
||||
|
@ -1525,11 +1525,19 @@ ARM target supports generating NEON instructions.
|
||||
@item arm_neon_hw
|
||||
Test system supports executing NEON instructions.
|
||||
|
||||
@item arm_neonv2_hw
|
||||
Test system supports executing NEON v2 instructions.
|
||||
|
||||
@item arm_neon_ok
|
||||
@anchor{arm_neon_ok}
|
||||
ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible
|
||||
options. Some multilibs may be incompatible with these options.
|
||||
|
||||
@item arm_neonv2_ok
|
||||
@anchor{arm_neon_ok}
|
||||
ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible
|
||||
options. Some multilibs may be incompatible with these options.
|
||||
|
||||
@item arm_neon_fp16_ok
|
||||
@anchor{arm_neon_fp16_ok}
|
||||
ARM Target supports @code{-mfpu=neon-fp16 -mfloat-abi=softfp} or compatible
|
||||
|
@ -1,3 +1,18 @@
|
||||
2012-09-11 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
|
||||
Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
|
||||
|
||||
* gcc.target/arm/neon-vfma-1.c: New testcase.
|
||||
* gcc.target/arm/neon-vfms-1.c: Likewise.
|
||||
* gcc.target/arm/neon-vmla-1.c: Update test to use int instead
|
||||
of float.
|
||||
* gcc.target/arm/neon-vmls-1.c: Likewise.
|
||||
* lib/target-supports.exp (add_options_for_arm_neonv2): New
|
||||
function.
|
||||
(check_effective_target_arm_neonv2_ok_nocache): Likewise.
|
||||
(check_effective_target_arm_neonv2_ok): Likewise.
|
||||
(check_effective_target_arm_neonv2_hw): Likewise.
|
||||
(check_effective_target_arm_neonv2): Likewise.
|
||||
|
||||
2012-09-11 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/54515
|
||||
|
12
gcc/testsuite/gcc.target/arm/neon-vfma-1.c
Normal file
12
gcc/testsuite/gcc.target/arm/neon-vfma-1.c
Normal file
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neonv2_ok } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-add-options arm_neonv2 } */
|
||||
/* { dg-final { scan-assembler "vfma\\.f32\[ \]+\[dDqQ]" } } */
|
||||
|
||||
/* Verify that VFMA is used. */
|
||||
void f1(int n, float a, float x[], float y[]) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
y[i] = a * x[i] + y[i];
|
||||
}
|
12
gcc/testsuite/gcc.target/arm/neon-vfms-1.c
Normal file
12
gcc/testsuite/gcc.target/arm/neon-vfms-1.c
Normal file
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neonv2_ok } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-add-options arm_neonv2 } */
|
||||
/* { dg-final { scan-assembler "vfms\\.f32\[ \]+\[dDqQ]" } } */
|
||||
|
||||
/* Verify that VFMS is used. */
|
||||
void f1(int n, float a, float x[], float y[]) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
y[i] = a * -x[i] + y[i];
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
/* { dg-require-effective-target arm_neon_hw } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
/* { dg-final { scan-assembler "vmla\\.f32" } } */
|
||||
/* { dg-final { scan-assembler "vmla\\.i32" } } */
|
||||
|
||||
/* Verify that VMLA is used. */
|
||||
void f1(int n, float a, float x[], float y[]) {
|
||||
void f1(int n, int a, int x[], int y[]) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
y[i] = a * x[i] + y[i];
|
||||
|
@ -1,10 +1,10 @@
|
||||
/* { dg-require-effective-target arm_neon_hw } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
/* { dg-final { scan-assembler "vmls\\.f32" } } */
|
||||
/* { dg-final { scan-assembler "vmls\\.i32" } } */
|
||||
|
||||
/* Verify that VMLS is used. */
|
||||
void f1(int n, float a, float x[], float y[]) {
|
||||
void f1(int n, int a, int x[], int y[]) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
y[i] = y[i] - a * x[i];
|
||||
|
@ -2099,6 +2099,19 @@ proc add_options_for_arm_neon { flags } {
|
||||
return "$flags $et_arm_neon_flags"
|
||||
}
|
||||
|
||||
# Add the options needed for NEON. We need either -mfloat-abi=softfp
|
||||
# or -mfloat-abi=hard, but if one is already specified by the
|
||||
# multilib, use it. Similarly, if a -mfpu option already enables
|
||||
# NEON, do not add -mfpu=neon.
|
||||
|
||||
proc add_options_for_arm_neonv2 { flags } {
|
||||
if { ! [check_effective_target_arm_neonv2_ok] } {
|
||||
return "$flags"
|
||||
}
|
||||
global et_arm_neonv2_flags
|
||||
return "$flags $et_arm_neonv2_flags"
|
||||
}
|
||||
|
||||
# Return 1 if this is an ARM target supporting -mfpu=neon
|
||||
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
|
||||
# incompatible with these options. Also set et_arm_neon_flags to the
|
||||
@ -2127,6 +2140,38 @@ proc check_effective_target_arm_neon_ok { } {
|
||||
check_effective_target_arm_neon_ok_nocache]
|
||||
}
|
||||
|
||||
# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
|
||||
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
|
||||
# incompatible with these options. Also set et_arm_neonv2_flags to the
|
||||
# best options to add.
|
||||
|
||||
proc check_effective_target_arm_neonv2_ok_nocache { } {
|
||||
global et_arm_neonv2_flags
|
||||
set et_arm_neonv2_flags ""
|
||||
if { [check_effective_target_arm32] } {
|
||||
foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
|
||||
if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
|
||||
#include "arm_neon.h"
|
||||
float32x2_t
|
||||
foo (float32x2_t a, float32x2_t b, float32x2_t c)
|
||||
{
|
||||
return vfma_f32 (a, b, c);
|
||||
}
|
||||
} "$flags"] } {
|
||||
set et_arm_neonv2_flags $flags
|
||||
return 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
proc check_effective_target_arm_neonv2_ok { } {
|
||||
return [check_cached_effective_target arm_neonv2_ok \
|
||||
check_effective_target_arm_neonv2_ok_nocache]
|
||||
}
|
||||
|
||||
# Add the options needed for NEON. We need either -mfloat-abi=softfp
|
||||
# or -mfloat-abi=hard, but if one is already specified by the
|
||||
# multilib, use it.
|
||||
@ -2318,6 +2363,21 @@ proc check_effective_target_arm_neon_hw { } {
|
||||
} [add_options_for_arm_neon ""]]
|
||||
}
|
||||
|
||||
proc check_effective_target_arm_neonv2_hw { } {
|
||||
return [check_runtime arm_neon_hwv2_available {
|
||||
#include "arm_neon.h"
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
float32x2_t a, b, c;
|
||||
asm ("vfma.f32 %P0, %P1, %P2"
|
||||
: "=w" (a)
|
||||
: "w" (b), "w" (c));
|
||||
return 0;
|
||||
}
|
||||
} [add_options_for_arm_neonv2 ""]]
|
||||
}
|
||||
|
||||
# Return 1 if this is a ARM target with NEON enabled.
|
||||
|
||||
proc check_effective_target_arm_neon { } {
|
||||
@ -2334,6 +2394,24 @@ proc check_effective_target_arm_neon { } {
|
||||
}
|
||||
}
|
||||
|
||||
proc check_effective_target_arm_neonv2 { } {
|
||||
if { [check_effective_target_arm32] } {
|
||||
return [check_no_compiler_messages arm_neon object {
|
||||
#ifndef __ARM_NEON__
|
||||
#error not NEON
|
||||
#else
|
||||
#ifndef __ARM_FEATURE_FMA
|
||||
#error not NEONv2
|
||||
#else
|
||||
int dummy;
|
||||
#endif
|
||||
#endif
|
||||
}]
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
# Return 1 if this a Loongson-2E or -2F target using an ABI that supports
|
||||
# the Loongson vector modes.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user