From 8b2ab9cbe46a9dc72099933681bc803766405295 Mon Sep 17 00:00:00 2001 From: Ramana Radhakrishnan Date: Tue, 11 Sep 2012 12:53:00 +0000 Subject: [PATCH] Add support for vectorized fma. 2012-09-11 Ramana Radhakrishnan Matthew Gretton-Dann * config/arm/neon.md (fma4): New pattern. (*fmsub4): Likewise. * doc/sourcebuild.texi (arm_neon_v2_ok, arm_neon_v2_hw): Document it. 2012-09-11 Ramana Radhakrishnan Matthew Gretton-Dann * gcc.target/arm/neon-vfma-1.c: New testcase. * gcc.target/arm/neon-vfms-1.c: Likewise. * gcc.target/arm/neon-vmla-1.c: Update test to use int instead of float. * gcc.target/arm/neon-vmls-1.c: Likewise. * lib/target-supports.exp (add_options_for_arm_neonv2): New function. (check_effective_target_arm_neonv2_ok_nocache): Likewise. (check_effective_target_arm_neonv2_ok): Likewise. (check_effective_target_arm_neonv2_hw): Likewise. (check_effective_target_arm_neonv2): Likewise. Co-Authored-By: Matthew Gretton-Dann From-SVN: r191180 --- gcc/ChangeLog | 7 ++ gcc/config/arm/neon.md | 27 ++++++++ gcc/doc/sourcebuild.texi | 8 +++ gcc/testsuite/ChangeLog | 15 +++++ gcc/testsuite/gcc.target/arm/neon-vfma-1.c | 12 ++++ gcc/testsuite/gcc.target/arm/neon-vfms-1.c | 12 ++++ gcc/testsuite/gcc.target/arm/neon-vmla-1.c | 4 +- gcc/testsuite/gcc.target/arm/neon-vmls-1.c | 4 +- gcc/testsuite/lib/target-supports.exp | 78 ++++++++++++++++++++++ 9 files changed, 163 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/neon-vfma-1.c create mode 100644 gcc/testsuite/gcc.target/arm/neon-vfms-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b1ac091e2bc6..e65785a9e312 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2012-09-11 Ramana Radhakrishnan + Matthew Gretton-Dann + + * config/arm/neon.md (fma4): New pattern. + (*fmsub4): Likewise. + * doc/sourcebuild.texi (arm_neon_v2_ok, arm_neon_v2_hw): Document it. + 2012-09-11 Aldy Hernandez PR middle-end/54149 diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index faf909078b44..fe0618c22585 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -707,6 +707,33 @@ (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] ) +;; Fused multiply-accumulate +(define_insn "fma4" + [(set (match_operand:VCVTF 0 "register_operand" "=w") + (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") + (match_operand:VCVTF 2 "register_operand" "w") + (match_operand:VCVTF 3 "register_operand" "0")))] + "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" + "vfma%?.\\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (match_test "") + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")))] +) + +(define_insn "*fmsub4" + [(set (match_operand:VCVTF 0 "register_operand" "=w") + (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) + (match_operand:VCVTF 2 "register_operand" "w") + (match_operand:VCVTF 3 "register_operand" "0")))] + "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" + "vfms%?.\\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (match_test "") + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")))] +) + (define_insn "ior3" [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 7e9dbe31c2d9..3fe52ad23564 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1525,11 +1525,19 @@ ARM target supports generating NEON instructions. @item arm_neon_hw Test system supports executing NEON instructions. +@item arm_neonv2_hw +Test system supports executing NEON v2 instructions. + @item arm_neon_ok @anchor{arm_neon_ok} ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible options. Some multilibs may be incompatible with these options. +@item arm_neonv2_ok +@anchor{arm_neon_ok} +ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible +options. Some multilibs may be incompatible with these options. + @item arm_neon_fp16_ok @anchor{arm_neon_fp16_ok} ARM Target supports @code{-mfpu=neon-fp16 -mfloat-abi=softfp} or compatible diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a68b6230fe68..47def6320280 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2012-09-11 Ramana Radhakrishnan + Matthew Gretton-Dann + + * gcc.target/arm/neon-vfma-1.c: New testcase. + * gcc.target/arm/neon-vfms-1.c: Likewise. + * gcc.target/arm/neon-vmla-1.c: Update test to use int instead + of float. + * gcc.target/arm/neon-vmls-1.c: Likewise. + * lib/target-supports.exp (add_options_for_arm_neonv2): New + function. + (check_effective_target_arm_neonv2_ok_nocache): Likewise. + (check_effective_target_arm_neonv2_ok): Likewise. + (check_effective_target_arm_neonv2_hw): Likewise. + (check_effective_target_arm_neonv2): Likewise. + 2012-09-11 Richard Guenther PR middle-end/54515 diff --git a/gcc/testsuite/gcc.target/arm/neon-vfma-1.c b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c new file mode 100644 index 000000000000..a003a8274f61 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neonv2_ok } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-add-options arm_neonv2 } */ +/* { dg-final { scan-assembler "vfma\\.f32\[ \]+\[dDqQ]" } } */ + +/* Verify that VFMA is used. */ +void f1(int n, float a, float x[], float y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = a * x[i] + y[i]; +} diff --git a/gcc/testsuite/gcc.target/arm/neon-vfms-1.c b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c new file mode 100644 index 000000000000..8cefd8a851ce --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neonv2_ok } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-add-options arm_neonv2 } */ +/* { dg-final { scan-assembler "vfms\\.f32\[ \]+\[dDqQ]" } } */ + +/* Verify that VFMS is used. */ +void f1(int n, float a, float x[], float y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = a * -x[i] + y[i]; +} diff --git a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c index 9d239ed47d09..c60c014e0c2f 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c +++ b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c @@ -1,10 +1,10 @@ /* { dg-require-effective-target arm_neon_hw } */ /* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-add-options arm_neon } */ -/* { dg-final { scan-assembler "vmla\\.f32" } } */ +/* { dg-final { scan-assembler "vmla\\.i32" } } */ /* Verify that VMLA is used. */ -void f1(int n, float a, float x[], float y[]) { +void f1(int n, int a, int x[], int y[]) { int i; for (i = 0; i < n; ++i) y[i] = a * x[i] + y[i]; diff --git a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c index 2beaebe17cf4..89ee82b0fe81 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c +++ b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c @@ -1,10 +1,10 @@ /* { dg-require-effective-target arm_neon_hw } */ /* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-add-options arm_neon } */ -/* { dg-final { scan-assembler "vmls\\.f32" } } */ +/* { dg-final { scan-assembler "vmls\\.i32" } } */ /* Verify that VMLS is used. */ -void f1(int n, float a, float x[], float y[]) { +void f1(int n, int a, int x[], int y[]) { int i; for (i = 0; i < n; ++i) y[i] = y[i] - a * x[i]; diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index f597316ac4db..8f793b7e5099 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2099,6 +2099,19 @@ proc add_options_for_arm_neon { flags } { return "$flags $et_arm_neon_flags" } +# Add the options needed for NEON. We need either -mfloat-abi=softfp +# or -mfloat-abi=hard, but if one is already specified by the +# multilib, use it. Similarly, if a -mfpu option already enables +# NEON, do not add -mfpu=neon. + +proc add_options_for_arm_neonv2 { flags } { + if { ! [check_effective_target_arm_neonv2_ok] } { + return "$flags" + } + global et_arm_neonv2_flags + return "$flags $et_arm_neonv2_flags" +} + # Return 1 if this is an ARM target supporting -mfpu=neon # -mfloat-abi=softfp or equivalent options. Some multilibs may be # incompatible with these options. Also set et_arm_neon_flags to the @@ -2127,6 +2140,38 @@ proc check_effective_target_arm_neon_ok { } { check_effective_target_arm_neon_ok_nocache] } +# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4 +# -mfloat-abi=softfp or equivalent options. Some multilibs may be +# incompatible with these options. Also set et_arm_neonv2_flags to the +# best options to add. + +proc check_effective_target_arm_neonv2_ok_nocache { } { + global et_arm_neonv2_flags + set et_arm_neonv2_flags "" + if { [check_effective_target_arm32] } { + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache arm_neonv2_ok object { + #include "arm_neon.h" + float32x2_t + foo (float32x2_t a, float32x2_t b, float32x2_t c) + { + return vfma_f32 (a, b, c); + } + } "$flags"] } { + set et_arm_neonv2_flags $flags + return 1 + } + } + } + + return 0 +} + +proc check_effective_target_arm_neonv2_ok { } { + return [check_cached_effective_target arm_neonv2_ok \ + check_effective_target_arm_neonv2_ok_nocache] +} + # Add the options needed for NEON. We need either -mfloat-abi=softfp # or -mfloat-abi=hard, but if one is already specified by the # multilib, use it. @@ -2318,6 +2363,21 @@ proc check_effective_target_arm_neon_hw { } { } [add_options_for_arm_neon ""]] } +proc check_effective_target_arm_neonv2_hw { } { + return [check_runtime arm_neon_hwv2_available { + #include "arm_neon.h" + int + main (void) + { + float32x2_t a, b, c; + asm ("vfma.f32 %P0, %P1, %P2" + : "=w" (a) + : "w" (b), "w" (c)); + return 0; + } + } [add_options_for_arm_neonv2 ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -2334,6 +2394,24 @@ proc check_effective_target_arm_neon { } { } } +proc check_effective_target_arm_neonv2 { } { + if { [check_effective_target_arm32] } { + return [check_no_compiler_messages arm_neon object { + #ifndef __ARM_NEON__ + #error not NEON + #else + #ifndef __ARM_FEATURE_FMA + #error not NEONv2 + #else + int dummy; + #endif + #endif + }] + } else { + return 0 + } +} + # Return 1 if this a Loongson-2E or -2F target using an ABI that supports # the Loongson vector modes.