diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 16a21da34461..6f6da0790385 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,52 @@ +2010-11-16 Richard Henderson + + * config.gcc [powerpc*, rs6000*] (extra_options): Add fused-madd.opt. + * config/rs6000/rs6000.opt (mfused-madd): Remove. + * config/rs6000/altivec.md (altivec_vmaddfp): Remove. + (*altivec_vmaddfp_1): Remove. + (*altivec_fmav4sf4): Rename from altivec_vmaddfp_2; use FMA. + (altivec_mulv4sf3): Expand to FMA directly. + (*altivec_vnmsubfp): Rename from altivec_vnmsubfp. + (*altivec_vnmsubfp_1, *altivec_vnmsubfp_2): Remove. + * config/rs6000/paired.md (paired_madds0): Use FMA. + (paired_madds1): Likewise. + (*paired_madd): Rename from paired_madd; use FMA. + (*paired_msub, *paired_nmadd, *paired_nmsub): Similarly. + * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Do not + consider TARGET_FUSED_MADD wrt rs6000_recip_control. + (bdesc_3arg): Update CODE_FOR_* for pattern renames. + (rs6000_emit_madd): Use fma_optab. + (rs6000_emit_msub): Use fms_optab. + (rs6000_emit_nmsub): Expand the FMA pattern directly. + * config/rs6000/rs6000.md (FMA_F): New mode iterator. + (*fmasf4_fpr): Rename from fmasf4_fpr. + (*nfmasf4_fpr): Rename from *fnmasf4_fpr. + (*nfmssf4_fpr): Rename from *fnmssf4_fpr. + (*fmaddsf4_powerpc, *fmaddsf4_power, *fmsubsf4_powerpc): Remove. + (*fmsubsf4_power, *fnmaddsf4_powerpc_1, *fnmaddsf4_powerpc_2): Remove. + (*fnmaddsf4_power_1, *fnmaddsf4_power_2, *fnmsubsf4_powerpc_1): Remove. + (*fnmsubsf4_powerpc_2, *fnmsubsf4_power_1, *fnmsubsf4_power_2): Remove. + (*fmadf4_fpr): Rename from fmadf4_fpr. + (*nfmadf4_fpr): Rename from *fnmadf4_fpr. + (*nfmsdf4_fpr): Rename from *fnmsdf4_fpr. + (*fmadddf4_fpr, *fmsubdf4_fpr, *fnmadddf4_fpr_1): Remove. + (*fnmadddf4_fpr_2, *fnmsubdf4_fpr_1, *fnmsubdf4_fpr_2): Remove. + (fmasf4, fmadf4): Macroize into... + (fma4): ... here. + (fms4, fnma4, fnms4): New. + (nfma4, nfms4): New. + * config/rs6000/vector.md (mul3): Do not depend on + TARGET_FUSED_MADD. + * config/rs6000/vsx.md (vsx_fmadd4): Remove. + (*vsx_fmadd4_1): Remove. + (vsx_fmsub4, *vsx_fmsub4_1): Remove. + (vsx_fnmadd4_1, vsx_fnmadd4_2): Remove. + (vsx_fnmsub4_1, vsx_fnmsub4_2): Remove. + (*vsx_fma4): Rename from vsx_fmadd4_2. + (*vsx_fms4): Rename from vsx_fmsub4_2. + (*vsx_nfma4): Rename from vsx_fnmadd4. + (*vsx_nfms4): Rename from vsx_fnmsub4. + 2010-11-16 Richard Henderson * config/pa/pa.md (fmadf4): Rename from unnamed; use FMA. diff --git a/gcc/config.gcc b/gcc/config.gcc index 0bafc3a0364b..0202a2293747 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -372,11 +372,11 @@ powerpc*-*-*) cpu_is_64bit=yes ;; esac - extra_options="${extra_options} g.opt" + extra_options="${extra_options} g.opt fused-madd.opt" ;; rs6000*-*-*) need_64bit_hwint=yes - extra_options="${extra_options} g.opt" + extra_options="${extra_options} g.opt fused-madd.opt" ;; score*-*-*) cpu_type=score diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 9f1b3fe7f8a8..a173ede93e6b 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -512,35 +512,9 @@ "vsel %0,%3,%2,%1" [(set_attr "type" "vecperm")]) -;; Fused multiply add. By default expand the FMA into (plus (mult)) to help -;; loop unrolling. Don't do negate multiply ops, because of complications with -;; honoring signed zero and fused-madd. +;; Fused multiply add. -(define_expand "altivec_vmaddfp" - [(set (match_operand:V4SF 0 "register_operand" "") - (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "register_operand" "")) - (match_operand:V4SF 3 "register_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" -{ - if (!TARGET_FUSED_MADD) - { - emit_insn (gen_altivec_vmaddfp_2 (operands[0], operands[1], operands[2], - operands[3])); - DONE; - } -}) - -(define_insn "*altivec_vmaddfp_1" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")) - (match_operand:V4SF 3 "register_operand" "v")))] - "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD" - "vmaddfp %0,%1,%2,%3" - [(set_attr "type" "vecfloat")]) - -(define_insn "altivec_vmaddfp_2" +(define_insn "*altivec_fmav4sf4" [(set (match_operand:V4SF 0 "register_operand" "=v") (fma:V4SF (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v") @@ -552,11 +526,11 @@ ;; We do multiply as a fused multiply-add with an add of a -0.0 vector. (define_expand "altivec_mulv4sf3" - [(use (match_operand:V4SF 0 "register_operand" "")) - (use (match_operand:V4SF 1 "register_operand" "")) - (use (match_operand:V4SF 2 "register_operand" ""))] + [(set (match_operand:V4SF 0 "register_operand" "") + (fma:V4SF (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "register_operand" "") + (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" - " { rtx neg0; @@ -565,11 +539,8 @@ emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx)); emit_insn (gen_vashlv4si3 (neg0, neg0, neg0)); - /* Use the multiply-add. */ - emit_insn (gen_altivec_vmaddfp (operands[0], operands[1], operands[2], - gen_lowpart (V4SFmode, neg0))); - DONE; -}") + operands[3] = gen_lowpart (V4SFmode, neg0); +}) ;; 32-bit integer multiplication ;; A_high = Operand_0 & 0xFFFF0000 >> 16 @@ -653,7 +624,7 @@ }") ;; Fused multiply subtract -(define_insn "altivec_vnmsubfp" +(define_insn "*altivec_vnmsubfp" [(set (match_operand:V4SF 0 "register_operand" "=v") (neg:V4SF (fma:V4SF (match_operand:V4SF 1 "register_operand" "v") @@ -664,31 +635,6 @@ "vnmsubfp %0,%1,%2,%3" [(set_attr "type" "vecfloat")]) -(define_insn "*altivec_vnmsubfp_1" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (neg:V4SF - (minus:V4SF - (mult:V4SF - (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")) - (match_operand:V4SF 3 "register_operand" "v"))))] - "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD - && HONOR_SIGNED_ZEROS (SFmode)" - "vnmsubfp %0,%1,%2,%3" - [(set_attr "type" "vecfloat")]) - -(define_insn "*altivec_vnmsubfp_2" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (minus:V4SF - (match_operand:V4SF 3 "register_operand" "v") - (mult:V4SF - (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v"))))] - "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD - && !HONOR_SIGNED_ZEROS (SFmode)" - "vnmsubfp %0,%1,%2,%3" - [(set_attr "type" "vecfloat")]) - (define_insn "altivec_vmsumum" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v") diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md index 698107826179..f077d5b6f795 100644 --- a/gcc/config/rs6000/paired.md +++ b/gcc/config/rs6000/paired.md @@ -96,77 +96,85 @@ (define_insn "paired_madds0" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (vec_concat:V2SF - (plus:SF (mult:SF (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") - (parallel [(const_int 0)])) - (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") - (parallel [(const_int 0)]))) - (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f") - (parallel [(const_int 0)]))) - (plus:SF (mult:SF (vec_select:SF (match_dup 1) - (parallel [(const_int 1)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 0)]))) - (vec_select:SF (match_dup 3) - (parallel [(const_int 1)])))))] - "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD" + (vec_concat:V2SF + (fma:SF + (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f") + (parallel [(const_int 0)]))) + (fma:SF + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 3) + (parallel [(const_int 1)])))))] + "TARGET_PAIRED_FLOAT" "ps_madds0 %0,%1,%2,%3" [(set_attr "type" "fp")]) (define_insn "paired_madds1" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (vec_concat:V2SF - (plus:SF (mult:SF (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") - (parallel [(const_int 0)])) - (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") - (parallel [(const_int 1)]))) - (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f") - (parallel [(const_int 0)]))) - (plus:SF (mult:SF (vec_select:SF (match_dup 1) - (parallel [(const_int 1)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)]))) - (vec_select:SF (match_dup 3) - (parallel [(const_int 1)])))))] - "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD" + (vec_concat:V2SF + (fma:SF + (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 1)])) + (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f") + (parallel [(const_int 0)]))) + (fma:SF + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:SF (match_dup 3) + (parallel [(const_int 1)])))))] + "TARGET_PAIRED_FLOAT" "ps_madds1 %0,%1,%2,%3" [(set_attr "type" "fp")]) -(define_insn "paired_madd" +(define_insn "*paired_madd" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (plus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")) - (match_operand:V2SF 3 "gpc_reg_operand" "f")))] - "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD" + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (match_operand:V2SF 3 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" "ps_madd %0,%1,%2,%3" [(set_attr "type" "fp")]) -(define_insn "paired_msub" +(define_insn "*paired_msub" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (minus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")) - (match_operand:V2SF 3 "gpc_reg_operand" "f")))] - "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD" + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f"))))] + "TARGET_PAIRED_FLOAT" "ps_msub %0,%1,%2,%3" [(set_attr "type" "fp")]) -(define_insn "paired_nmadd" +(define_insn "*paired_nmadd" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (neg:V2SF (plus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")) - (match_operand:V2SF 3 "gpc_reg_operand" "f"))))] - "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD - && HONOR_SIGNED_ZEROS (SFmode)" + (neg:V2SF + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (match_operand:V2SF 3 "gpc_reg_operand" "f"))))] + "TARGET_PAIRED_FLOAT" "ps_nmadd %0,%1,%2,%3" [(set_attr "type" "fp")]) -(define_insn "paired_nmsub" +(define_insn "*paired_nmsub" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (neg:V2SF (minus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")) - (match_operand:V2SF 3 "gpc_reg_operand" "f"))))] - "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD - && HONOR_SIGNED_ZEROS (DFmode)" + (neg:V2SF + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f")))))] + "TARGET_PAIRED_FLOAT" "ps_nmsub %0,%1,%2,%3" [(set_attr "type" "dmul")]) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d71340b61eb0..fe3d0db904ad 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -2288,16 +2288,13 @@ rs6000_init_hard_regno_mode_ok (void) if (rs6000_recip_control) { - if (!TARGET_FUSED_MADD) - warning (0, "-mrecip requires -mfused-madd"); if (!flag_finite_math_only) warning (0, "-mrecip requires -ffinite-math or -ffast-math"); if (flag_trapping_math) warning (0, "-mrecip requires -fno-trapping-math or -ffast-math"); if (!flag_reciprocal_math) warning (0, "-mrecip requires -freciprocal-math or -ffast-math"); - if (TARGET_FUSED_MADD && flag_finite_math_only && !flag_trapping_math - && flag_reciprocal_math) + if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math) { if (RS6000_RECIP_HAVE_RE_P (SFmode) && (rs6000_recip_control & RECIP_SF_DIV) != 0) @@ -9688,7 +9685,7 @@ def_builtin (int mask, const char *name, tree type, int code) static const struct builtin_description bdesc_3arg[] = { - { MASK_ALTIVEC, CODE_FOR_altivec_vmaddfp, "__builtin_altivec_vmaddfp", ALTIVEC_BUILTIN_VMADDFP }, + { MASK_ALTIVEC, CODE_FOR_fmav4sf4, "__builtin_altivec_vmaddfp", ALTIVEC_BUILTIN_VMADDFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vmhaddshs, "__builtin_altivec_vmhaddshs", ALTIVEC_BUILTIN_VMHADDSHS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmhraddshs, "__builtin_altivec_vmhraddshs", ALTIVEC_BUILTIN_VMHRADDSHS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmladduhm, "__builtin_altivec_vmladduhm", ALTIVEC_BUILTIN_VMLADDUHM}, @@ -9698,7 +9695,7 @@ static const struct builtin_description bdesc_3arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vmsumshm, "__builtin_altivec_vmsumshm", ALTIVEC_BUILTIN_VMSUMSHM }, { MASK_ALTIVEC, CODE_FOR_altivec_vmsumuhs, "__builtin_altivec_vmsumuhs", ALTIVEC_BUILTIN_VMSUMUHS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmsumshs, "__builtin_altivec_vmsumshs", ALTIVEC_BUILTIN_VMSUMSHS }, - { MASK_ALTIVEC, CODE_FOR_altivec_vnmsubfp, "__builtin_altivec_vnmsubfp", ALTIVEC_BUILTIN_VNMSUBFP }, + { MASK_ALTIVEC, CODE_FOR_nfmsv4sf4, "__builtin_altivec_vnmsubfp", ALTIVEC_BUILTIN_VNMSUBFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2df, "__builtin_altivec_vperm_2df", ALTIVEC_BUILTIN_VPERM_2DF }, { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2di, "__builtin_altivec_vperm_2di", ALTIVEC_BUILTIN_VPERM_2DI }, { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4sf, "__builtin_altivec_vperm_4sf", ALTIVEC_BUILTIN_VPERM_4SF }, @@ -9740,15 +9737,15 @@ static const struct builtin_description bdesc_3arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_perm", ALTIVEC_BUILTIN_VEC_PERM }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sel", ALTIVEC_BUILTIN_VEC_SEL }, - { MASK_VSX, CODE_FOR_vsx_fmaddv2df4, "__builtin_vsx_xvmadddp", VSX_BUILTIN_XVMADDDP }, - { MASK_VSX, CODE_FOR_vsx_fmsubv2df4, "__builtin_vsx_xvmsubdp", VSX_BUILTIN_XVMSUBDP }, - { MASK_VSX, CODE_FOR_vsx_fnmaddv2df4, "__builtin_vsx_xvnmadddp", VSX_BUILTIN_XVNMADDDP }, - { MASK_VSX, CODE_FOR_vsx_fnmsubv2df4, "__builtin_vsx_xvnmsubdp", VSX_BUILTIN_XVNMSUBDP }, + { MASK_VSX, CODE_FOR_fmav2df4, "__builtin_vsx_xvmadddp", VSX_BUILTIN_XVMADDDP }, + { MASK_VSX, CODE_FOR_fmsv2df4, "__builtin_vsx_xvmsubdp", VSX_BUILTIN_XVMSUBDP }, + { MASK_VSX, CODE_FOR_nfmav2df4, "__builtin_vsx_xvnmadddp", VSX_BUILTIN_XVNMADDDP }, + { MASK_VSX, CODE_FOR_nfmsv2df4, "__builtin_vsx_xvnmsubdp", VSX_BUILTIN_XVNMSUBDP }, - { MASK_VSX, CODE_FOR_vsx_fmaddv4sf4, "__builtin_vsx_xvmaddsp", VSX_BUILTIN_XVMADDSP }, - { MASK_VSX, CODE_FOR_vsx_fmsubv4sf4, "__builtin_vsx_xvmsubsp", VSX_BUILTIN_XVMSUBSP }, - { MASK_VSX, CODE_FOR_vsx_fnmaddv4sf4, "__builtin_vsx_xvnmaddsp", VSX_BUILTIN_XVNMADDSP }, - { MASK_VSX, CODE_FOR_vsx_fnmsubv4sf4, "__builtin_vsx_xvnmsubsp", VSX_BUILTIN_XVNMSUBSP }, + { MASK_VSX, CODE_FOR_fmav4sf4, "__builtin_vsx_xvmaddsp", VSX_BUILTIN_XVMADDSP }, + { MASK_VSX, CODE_FOR_fmsv4sf4, "__builtin_vsx_xvmsubsp", VSX_BUILTIN_XVMSUBSP }, + { MASK_VSX, CODE_FOR_nfmav4sf4, "__builtin_vsx_xvnmaddsp", VSX_BUILTIN_XVNMADDSP }, + { MASK_VSX, CODE_FOR_nfmsv4sf4, "__builtin_vsx_xvnmsubsp", VSX_BUILTIN_XVNMSUBSP }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_msub", VSX_BUILTIN_VEC_MSUB }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_nmadd", VSX_BUILTIN_VEC_NMADD }, @@ -9793,12 +9790,12 @@ static const struct builtin_description bdesc_3arg[] = { MASK_VSX, CODE_FOR_vsx_xxsldwi_v16qi, "__builtin_vsx_xxsldwi_16qi", VSX_BUILTIN_XXSLDWI_16QI }, { MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxsldwi", VSX_BUILTIN_VEC_XXSLDWI }, - { 0, CODE_FOR_paired_msub, "__builtin_paired_msub", PAIRED_BUILTIN_MSUB }, - { 0, CODE_FOR_paired_madd, "__builtin_paired_madd", PAIRED_BUILTIN_MADD }, + { 0, CODE_FOR_fmsv2sf4, "__builtin_paired_msub", PAIRED_BUILTIN_MSUB }, + { 0, CODE_FOR_fmav2sf4, "__builtin_paired_madd", PAIRED_BUILTIN_MADD }, { 0, CODE_FOR_paired_madds0, "__builtin_paired_madds0", PAIRED_BUILTIN_MADDS0 }, { 0, CODE_FOR_paired_madds1, "__builtin_paired_madds1", PAIRED_BUILTIN_MADDS1 }, - { 0, CODE_FOR_paired_nmsub, "__builtin_paired_nmsub", PAIRED_BUILTIN_NMSUB }, - { 0, CODE_FOR_paired_nmadd, "__builtin_paired_nmadd", PAIRED_BUILTIN_NMADD }, + { 0, CODE_FOR_nfmsv2sf4, "__builtin_paired_nmsub", PAIRED_BUILTIN_NMSUB }, + { 0, CODE_FOR_nfmav2sf4, "__builtin_paired_nmadd", PAIRED_BUILTIN_NMADD }, { 0, CODE_FOR_paired_sum0, "__builtin_paired_sum0", PAIRED_BUILTIN_SUM0 }, { 0, CODE_FOR_paired_sum1, "__builtin_paired_sum1", PAIRED_BUILTIN_SUM1 }, { 0, CODE_FOR_selv2sf4, "__builtin_paired_selv2sf4", PAIRED_BUILTIN_SELV2SF4 }, @@ -26394,112 +26391,65 @@ rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst) return reg; } -/* Generate a FMADD instruction: - dst = (m1 * m2) + a - - generating different RTL based on the fused multiply/add switch. */ +/* Generate an FMA instruction. */ static void -rs6000_emit_madd (rtx dst, rtx m1, rtx m2, rtx a) +rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a) { - enum machine_mode mode = GET_MODE (dst); + enum machine_mode mode = GET_MODE (target); + rtx dst; - if (!TARGET_FUSED_MADD) - { - /* For the simple ops, use the generator function, rather than assuming - that the RTL is standard. */ - enum insn_code mcode = optab_handler (smul_optab, mode); - enum insn_code acode = optab_handler (add_optab, mode); - gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (mcode); - gen_2arg_fn_t gen_add = (gen_2arg_fn_t) GEN_FCN (acode); - rtx mreg = gen_reg_rtx (mode); + dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0); + gcc_assert (dst != NULL); - gcc_assert (mcode != CODE_FOR_nothing && acode != CODE_FOR_nothing); - emit_insn (gen_mul (mreg, m1, m2)); - emit_insn (gen_add (dst, mreg, a)); - } - - else - emit_insn (gen_rtx_SET (VOIDmode, dst, - gen_rtx_PLUS (mode, - gen_rtx_MULT (mode, m1, m2), - a))); + if (dst != target) + emit_move_insn (target, dst); } -/* Generate a FMSUB instruction: - dst = (m1 * m2) - a - - generating different RTL based on the fused multiply/add switch. */ +/* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */ static void -rs6000_emit_msub (rtx dst, rtx m1, rtx m2, rtx a) +rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a) { - enum machine_mode mode = GET_MODE (dst); - - if (!TARGET_FUSED_MADD - || (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (V4SFmode))) - { - /* For the simple ops, use the generator function, rather than assuming - that the RTL is standard. */ - enum insn_code mcode = optab_handler (smul_optab, mode); - enum insn_code scode = optab_handler (add_optab, mode); - gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (mcode); - gen_2arg_fn_t gen_sub = (gen_2arg_fn_t) GEN_FCN (scode); - rtx mreg = gen_reg_rtx (mode); - - gcc_assert (mcode != CODE_FOR_nothing && scode != CODE_FOR_nothing); - emit_insn (gen_mul (mreg, m1, m2)); - emit_insn (gen_sub (dst, mreg, a)); - } + enum machine_mode mode = GET_MODE (target); + rtx dst; + /* Altivec does not support fms directly; + generate in terms of fma in that case. */ + if (optab_handler (fms_optab, mode) != CODE_FOR_nothing) + dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0); else - emit_insn (gen_rtx_SET (VOIDmode, dst, - gen_rtx_MINUS (mode, - gen_rtx_MULT (mode, m1, m2), - a))); + { + a = expand_unop (mode, neg_optab, a, NULL_RTX, 0); + dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0); + } + gcc_assert (dst != NULL); + + if (dst != target) + emit_move_insn (target, dst); } - -/* Generate a FNMSUB instruction: - dst = - ((m1 * m2) - a) - - Which is equivalent to (except in the prescence of -0.0): - dst = a - (m1 * m2) - - generating different RTL based on the fast-math and fused multiply/add - switches. */ + +/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */ static void rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a) { enum machine_mode mode = GET_MODE (dst); + rtx r; - if (!TARGET_FUSED_MADD) - { - /* For the simple ops, use the generator function, rather than assuming - that the RTL is standard. */ - enum insn_code mcode = optab_handler (smul_optab, mode); - enum insn_code scode = optab_handler (sub_optab, mode); - gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (mcode); - gen_2arg_fn_t gen_sub = (gen_2arg_fn_t) GEN_FCN (scode); - rtx mreg = gen_reg_rtx (mode); + /* This is a tad more complicated, since the fnma_optab is for + a different expression: fma(-m1, m2, a), which is the same + thing except in the case of signed zeros. - gcc_assert (mcode != CODE_FOR_nothing && scode != CODE_FOR_nothing); - emit_insn (gen_mul (mreg, m1, m2)); - emit_insn (gen_sub (dst, a, mreg)); - } + Fortunately we know that if FMA is supported that FNMSUB is + also supported in the ISA. Just expand it directly. */ - else - { - rtx m = gen_rtx_MULT (mode, m1, m2); + gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing); - if (!HONOR_SIGNED_ZEROS (mode)) - emit_insn (gen_rtx_SET (VOIDmode, dst, gen_rtx_MINUS (mode, a, m))); - - else - emit_insn (gen_rtx_SET (VOIDmode, dst, - gen_rtx_NEG (mode, - gen_rtx_MINUS (mode, m, a)))); - } + r = gen_rtx_NEG (mode, a); + r = gen_rtx_FMA (mode, m1, m2, r); + r = gen_rtx_NEG (mode, r); + emit_insn (gen_rtx_SET (VOIDmode, dst, r)); } /* Newton-Raphson approximation of floating point divide with just 2 passes diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 200a3b1e9eda..2d73bd83aed7 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -226,6 +226,16 @@ (DD "TARGET_DFP") (TD "TARGET_DFP")]) +; Any fma capable floating-point mode. +(define_mode_iterator FMA_F [ + (SF "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT") + (DF "(TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) + || VECTOR_UNIT_VSX_P (DFmode)") + (V2SF "TARGET_PAIRED_FLOAT") + (V4SF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)") + (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)") + ]) + ; These modes do not fit in integer registers in 32-bit mode. ; but on e500v2, the gpr are 64 bit registers (define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD]) @@ -5845,28 +5855,17 @@ [(set_attr "type" "fp")]) ; builtin fmaf support -; If the user explicitly uses the fma builtin, don't convert this to -; (plus (mult op1 op2) op3) -(define_expand "fmasf4" - [(set (match_operand:SF 0 "gpc_reg_operand" "") - (fma:SF (match_operand:SF 1 "gpc_reg_operand" "") - (match_operand:SF 2 "gpc_reg_operand" "") - (match_operand:SF 3 "gpc_reg_operand" "")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" - "") - -(define_insn "fmasf4_fpr" +(define_insn "*fmasf4_fpr" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f") (match_operand:SF 2 "gpc_reg_operand" "f") (match_operand:SF 3 "gpc_reg_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" - "* { - return ((TARGET_POWERPC) - ? \"fmadds %0,%1,%2,%3\" - : \"{fma|fmadd} %0,%1,%2,%3\"); -}" + return (TARGET_POWERPC + ? "fmadds %0,%1,%2,%3" + : "{fma|fmadd} %0,%1,%2,%3"); +} [(set_attr "type" "fp") (set_attr "fp_type" "fp_maddsub_s")]) @@ -5876,168 +5875,42 @@ (match_operand:SF 2 "gpc_reg_operand" "f") (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" - "* { - return ((TARGET_POWERPC) - ? \"fmsubs %0,%1,%2,%3\" - : \"{fms|fmsub} %0,%1,%2,%3\"); -}" + return (TARGET_POWERPC + ? "fmsubs %0,%1,%2,%3" + : "{fms|fmsub} %0,%1,%2,%3"); +} [(set_attr "type" "fp") (set_attr "fp_type" "fp_maddsub_s")]) -(define_insn "*fnmasf4_fpr" +(define_insn "*nfmasf4_fpr" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f") (match_operand:SF 2 "gpc_reg_operand" "f") (match_operand:SF 3 "gpc_reg_operand" "f"))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" - "* { - return ((TARGET_POWERPC) - ? \"fnmadds %0,%1,%2,%3\" - : \"{fnma|fnmadd} %0,%1,%2,%3\"); -}" + return (TARGET_POWERPC + ? "fnmadds %0,%1,%2,%3" + : "{fnma|fnmadd} %0,%1,%2,%3"); +} [(set_attr "type" "fp") (set_attr "fp_type" "fp_maddsub_s")]) -(define_insn "*fnmssf4_fpr" +(define_insn "*nfmssf4_fpr" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f") (match_operand:SF 2 "gpc_reg_operand" "f") (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" - "* { - return ((TARGET_POWERPC) - ? \"fnmsubs %0,%1,%2,%3\" - : \"{fnms|fnmsub} %0,%1,%2,%3\"); -}" + return (TARGET_POWERPC + ? "fnmsubs %0,%1,%2,%3" + : "{fnms|fnmsub} %0,%1,%2,%3"); +} [(set_attr "type" "fp") (set_attr "fp_type" "fp_maddsub_s")]) -; Fused multiply/add ops created by the combiner -(define_insn "*fmaddsf4_powerpc" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f")))] - "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS - && TARGET_SINGLE_FLOAT && TARGET_FUSED_MADD" - "fmadds %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) - -(define_insn "*fmaddsf4_power" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f")))] - "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD" - "{fma|fmadd} %0,%1,%2,%3" - [(set_attr "type" "dmul")]) - -(define_insn "*fmsubsf4_powerpc" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f")))] - "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS - && TARGET_SINGLE_FLOAT && TARGET_FUSED_MADD" - "fmsubs %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) - -(define_insn "*fmsubsf4_power" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f")))] - "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD" - "{fms|fmsub} %0,%1,%2,%3" - [(set_attr "type" "dmul")]) - -(define_insn "*fnmaddsf4_powerpc_1" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f"))))] - "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD - && TARGET_SINGLE_FLOAT" - "fnmadds %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) - -(define_insn "*fnmaddsf4_powerpc_2" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f")) - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f")))] - "TARGET_POWERPC && TARGET_SINGLE_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD - && ! HONOR_SIGNED_ZEROS (SFmode)" - "fnmadds %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) - -(define_insn "*fnmaddsf4_power_1" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f"))))] - "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD" - "{fnma|fnmadd} %0,%1,%2,%3" - [(set_attr "type" "dmul")]) - -(define_insn "*fnmaddsf4_power_2" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f")) - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f")))] - "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD - && ! HONOR_SIGNED_ZEROS (SFmode)" - "{fnma|fnmadd} %0,%1,%2,%3" - [(set_attr "type" "dmul")]) - -(define_insn "*fnmsubsf4_powerpc_1" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (neg:SF (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f"))))] - "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD - && TARGET_SINGLE_FLOAT" - "fnmsubs %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) - -(define_insn "*fnmsubsf4_powerpc_2" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f") - (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f"))))] - "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD - && TARGET_SINGLE_FLOAT && ! HONOR_SIGNED_ZEROS (SFmode)" - "fnmsubs %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) - -(define_insn "*fnmsubsf4_power_1" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (neg:SF (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f")) - (match_operand:SF 3 "gpc_reg_operand" "f"))))] - "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD" - "{fnms|fnmsub} %0,%1,%2,%3" - [(set_attr "type" "dmul")]) - -(define_insn "*fnmsubsf4_power_2" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f") - (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") - (match_operand:SF 2 "gpc_reg_operand" "f"))))] - "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD - && ! HONOR_SIGNED_ZEROS (SFmode)" - "{fnms|fnmsub} %0,%1,%2,%3" - [(set_attr "type" "dmul")]) - (define_expand "sqrtsf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") (sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))] @@ -6385,17 +6258,7 @@ [(set_attr "type" "fp")]) ; builtin fma support -; If the user explicitly uses the fma builtin, don't convert this to -; (plus (mult op1 op2) op3) -(define_expand "fmadf4" - [(set (match_operand:DF 0 "gpc_reg_operand" "") - (fma:DF (match_operand:DF 1 "gpc_reg_operand" "") - (match_operand:DF 2 "gpc_reg_operand" "") - (match_operand:DF 3 "gpc_reg_operand" "")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" - "") - -(define_insn "fmadf4_fpr" +(define_insn "*fmadf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f") (match_operand:DF 2 "gpc_reg_operand" "f") @@ -6417,7 +6280,7 @@ [(set_attr "type" "fp") (set_attr "fp_type" "fp_maddsub_s")]) -(define_insn "*fnmadf4_fpr" +(define_insn "*nfmadf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f") (match_operand:DF 2 "gpc_reg_operand" "f") @@ -6428,7 +6291,7 @@ [(set_attr "type" "fp") (set_attr "fp_type" "fp_maddsub_s")]) -(define_insn "*fnmsdf4_fpr" +(define_insn "*nfmsdf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f") (match_operand:DF 2 "gpc_reg_operand" "f") @@ -6439,73 +6302,6 @@ [(set_attr "type" "fp") (set_attr "fp_type" "fp_maddsub_s")]) -; Fused multiply/add ops created by the combiner -(define_insn "*fmadddf4_fpr" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") - (match_operand:DF 2 "gpc_reg_operand" "d")) - (match_operand:DF 3 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && VECTOR_UNIT_NONE_P (DFmode)" - "{fma|fmadd} %0,%1,%2,%3" - [(set_attr "type" "dmul") - (set_attr "fp_type" "fp_maddsub_d")]) - -(define_insn "*fmsubdf4_fpr" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") - (match_operand:DF 2 "gpc_reg_operand" "d")) - (match_operand:DF 3 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && VECTOR_UNIT_NONE_P (DFmode)" - "{fms|fmsub} %0,%1,%2,%3" - [(set_attr "type" "dmul") - (set_attr "fp_type" "fp_maddsub_d")]) - -(define_insn "*fnmadddf4_fpr_1" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") - (match_operand:DF 2 "gpc_reg_operand" "d")) - (match_operand:DF 3 "gpc_reg_operand" "d"))))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && VECTOR_UNIT_NONE_P (DFmode)" - "{fnma|fnmadd} %0,%1,%2,%3" - [(set_attr "type" "dmul") - (set_attr "fp_type" "fp_maddsub_d")]) - -(define_insn "*fnmadddf4_fpr_2" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (minus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")) - (match_operand:DF 2 "gpc_reg_operand" "d")) - (match_operand:DF 3 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)" - "{fnma|fnmadd} %0,%1,%2,%3" - [(set_attr "type" "dmul") - (set_attr "fp_type" "fp_maddsub_d")]) - -(define_insn "*fnmsubdf4_fpr_1" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (neg:DF (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") - (match_operand:DF 2 "gpc_reg_operand" "d")) - (match_operand:DF 3 "gpc_reg_operand" "d"))))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && VECTOR_UNIT_NONE_P (DFmode)" - "{fnms|fnmsub} %0,%1,%2,%3" - [(set_attr "type" "dmul") - (set_attr "fp_type" "fp_maddsub_d")]) - -(define_insn "*fnmsubdf4_fpr_2" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (minus:DF (match_operand:DF 3 "gpc_reg_operand" "d") - (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d") - (match_operand:DF 2 "gpc_reg_operand" "d"))))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT - && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)" - "{fnms|fnmsub} %0,%1,%2,%3" - [(set_attr "type" "dmul") - (set_attr "fp_type" "fp_maddsub_d")]) - (define_expand "sqrtdf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") (sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "")))] @@ -16309,6 +16105,73 @@ "bpermd %0,%1,%2" [(set_attr "type" "integer")]) + +;; Builtin fma support. Handle +;; Note that the conditions for expansion are in the FMA_F iterator. + +(define_expand "fma4" + [(set (match_operand:FMA_F 0 "register_operand" "") + (fma:FMA_F + (match_operand:FMA_F 1 "register_operand" "") + (match_operand:FMA_F 2 "register_operand" "") + (match_operand:FMA_F 3 "register_operand" "")))] + "" + "") + +; Altivec only has fma and nfms. +(define_expand "fms4" + [(set (match_operand:FMA_F 0 "register_operand" "") + (fma:FMA_F + (match_operand:FMA_F 1 "register_operand" "") + (match_operand:FMA_F 2 "register_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "register_operand" ""))))] + "!VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +;; If signed zeros are ignored, -(a * b - c) = -a * b + c. +(define_expand "fnma4" + [(set (match_operand:FMA_F 0 "register_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "register_operand" "") + (match_operand:FMA_F 2 "register_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))] + "!HONOR_SIGNED_ZEROS (mode)" + "") + +;; If signed zeros are ignored, -(a * b + c) = -a * b - c. +(define_expand "fnms4" + [(set (match_operand:FMA_F 0 "register_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "register_operand" "") + (match_operand:FMA_F 2 "register_operand" "") + (match_operand:FMA_F 3 "register_operand" ""))))] + "!HONOR_SIGNED_ZEROS (mode) && !VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +; Not an official optab name, but used from builtins. +(define_expand "nfma4" + [(set (match_operand:FMA_F 0 "register_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "register_operand" "") + (match_operand:FMA_F 2 "register_operand" "") + (match_operand:FMA_F 3 "register_operand" ""))))] + "!VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +; Not an official optab name, but used from builtins. +(define_expand "nfms4" + [(set (match_operand:FMA_F 0 "register_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "register_operand" "") + (match_operand:FMA_F 2 "register_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))] + "" + "") + (include "sync.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index bdfbf782c451..063036ac85c6 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -176,10 +176,6 @@ mavoid-indexed-addresses Target Report Var(TARGET_AVOID_XFORM) Init(-1) Avoid generation of indexed load/store instructions when possible -mfused-madd -Target Report Var(TARGET_FUSED_MADD) Init(1) -Generate fused multiply/add instructions - mtls-markers Target Report Var(tls_markers) Init(1) Mark __tls_get_addr calls with argument info diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 05a498f1a1b0..71961fbc57c4 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -202,16 +202,14 @@ [(set (match_operand:VEC_F 0 "vfloat_operand" "") (mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" "")))] - "(VECTOR_UNIT_VSX_P (mode) - || (VECTOR_UNIT_ALTIVEC_P (mode) && TARGET_FUSED_MADD))" - " + "VECTOR_UNIT_VSX_P (mode) || VECTOR_UNIT_ALTIVEC_P (mode)" { if (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (mode)) { emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2])); DONE; } -}") +}) (define_expand "div3" [(set (match_operand:VEC_F 0 "vfloat_operand" "") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 4e6898735973..e241e26db896 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -513,51 +513,12 @@ ;; Fused vector multiply/add instructions -;; Note we have a pattern for the multiply/add operations that uses unspec and -;; does not check -mfused-madd to allow users to use these ops when they know -;; they want the fused multiply/add. - -;; Fused multiply add. By default expand the FMA into (plus (mult)) to help -;; loop unrolling. Don't do negate multiply ops, because of complications with -;; honoring signed zero and fused-madd. - -(define_expand "vsx_fmadd4" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "") - (plus:VSX_B - (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "") - (match_operand:VSX_B 2 "vsx_register_operand" "")) - (match_operand:VSX_B 3 "vsx_register_operand" "")))] - "VECTOR_UNIT_VSX_P (mode)" -{ - if (!TARGET_FUSED_MADD) - { - emit_insn (gen_vsx_fmadd4_2 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*vsx_fmadd4_1" +(define_insn "*vsx_fma4" [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (plus:VSX_B - (mult:VSX_B + (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] - "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD" - "@ - xmadda %x0,%x1,%x2 - xmaddm %x0,%x1,%x3 - xmadda %x0,%x1,%x2 - xmaddm %x0,%x1,%x3" - [(set_attr "type" "") - (set_attr "fp_type" "")]) - -(define_insn "vsx_fmadd4_2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] "VECTOR_UNIT_VSX_P (mode)" "@ xmadda %x0,%x1,%x2 @@ -567,44 +528,13 @@ [(set_attr "type" "") (set_attr "fp_type" "")]) -(define_expand "vsx_fmsub4" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "") - (minus:VSX_B - (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "") - (match_operand:VSX_B 2 "vsx_register_operand" "")) - (match_operand:VSX_B 3 "vsx_register_operand" "")))] - "VECTOR_UNIT_VSX_P (mode)" -{ - if (!TARGET_FUSED_MADD) - { - emit_insn (gen_vsx_fmsub4_2 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*vsx_fmsub4_1" +(define_insn "*vsx_fms4" [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (minus:VSX_B - (mult:VSX_B + (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] - "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD" - "@ - xmsuba %x0,%x1,%x2 - xmsubm %x0,%x1,%x3 - xmsuba %x0,%x1,%x2 - xmsubm %x0,%x1,%x3" - [(set_attr "type" "") - (set_attr "fp_type" "")]) - -(define_insn "vsx_fmsub4_2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") - (neg:VSX_B - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa"))))] + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") + (neg:VSX_B + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa"))))] "VECTOR_UNIT_VSX_P (mode)" "@ xmsuba %x0,%x1,%x2 @@ -614,7 +544,7 @@ [(set_attr "type" "") (set_attr "fp_type" "")]) -(define_insn "vsx_fnmadd4" +(define_insn "*vsx_nfma4" [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") (neg:VSX_B (fma:VSX_B @@ -630,85 +560,15 @@ [(set_attr "type" "") (set_attr "fp_type" "")]) -(define_insn "vsx_fnmadd4_1" +(define_insn "*vsx_nfms4" [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") (neg:VSX_B - (plus:VSX_B - (mult:VSX_B - (match_operand:VSX_B 1 "vsx_register_operand" ",,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa"))))] - "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD - && HONOR_SIGNED_ZEROS (DFmode)" - "@ - xnmadda %x0,%x1,%x2 - xnmaddm %x0,%x1,%x3 - xnmadda %x0,%x1,%x2 - xnmaddm %x0,%x1,%x3" - [(set_attr "type" "") - (set_attr "fp_type" "")]) - -(define_insn "vsx_fnmadd4_2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (minus:VSX_B - (mult:VSX_B - (neg:VSX_B - (match_operand:VSX_B 1 "gpc_reg_operand" ",,wa,wa")) - (match_operand:VSX_B 2 "gpc_reg_operand" ",0,wa,0")) - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))] - "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD - && !HONOR_SIGNED_ZEROS (DFmode)" - "@ - xnmadda %x0,%x1,%x2 - xnmaddm %x0,%x1,%x3 - xnmadda %x0,%x1,%x2 - xnmaddm %x0,%x1,%x3" - [(set_attr "type" "") - (set_attr "fp_type" "")]) - -(define_insn "vsx_fnmsub4" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (neg:VSX_B - (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") - (neg:VSX_B - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))))] - "VECTOR_UNIT_VSX_P (mode)" - "@ - xnmsuba %x0,%x1,%x2 - xnmsubm %x0,%x1,%x3 - xnmsuba %x0,%x1,%x2 - xnmsubm %x0,%x1,%x3" - [(set_attr "type" "") - (set_attr "fp_type" "")]) - -(define_insn "vsx_fnmsub4_1" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (neg:VSX_B - (minus:VSX_B - (mult:VSX_B + (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0")) - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa"))))] - "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD - && HONOR_SIGNED_ZEROS (DFmode)" - "@ - xnmsuba %x0,%x1,%x2 - xnmsubm %x0,%x1,%x3 - xnmsuba %x0,%x1,%x2 - xnmsubm %x0,%x1,%x3" - [(set_attr "type" "") - (set_attr "fp_type" "")]) - -(define_insn "vsx_fnmsub4_2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,,?wa,?wa") - (minus:VSX_B - (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa") - (mult:VSX_B - (match_operand:VSX_B 1 "vsx_register_operand" "%,,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0"))))] - "VECTOR_UNIT_VSX_P (mode) && TARGET_FUSED_MADD - && !HONOR_SIGNED_ZEROS (DFmode)" + (match_operand:VSX_B 2 "vsx_register_operand" ",0,wa,0") + (neg:VSX_B + (match_operand:VSX_B 3 "vsx_register_operand" "0,,0,wa")))))] + "VECTOR_UNIT_VSX_P (mode)" "@ xnmsuba %x0,%x1,%x2 xnmsubm %x0,%x1,%x3 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 10a7c3e61d87..599a5f2f16ab 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2010-11-16 Richard Henderson + + * gcc.target/powerpc/ppc-fma-2.c: Use -ffp-contract=off. + * gcc.target/powerpc/ppc-fma-4.c: Likewise. + 2010-11-16 Eric Botcazou * gnat.dg/opt12.adb: New test. diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c index a17565b2c909..111b9cb098ed 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math -mno-fused-madd" } */ +/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math -ffp-contract=off" } */ /* { dg-final { scan-assembler-times "xvmadd" 2 } } */ /* { dg-final { scan-assembler-times "xsmadd" 1 } } */ /* { dg-final { scan-assembler-times "fmadds" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c index 50e431784770..44da6e76bc46 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_altivec_ok } */ -/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math -mno-fused-madd" } */ +/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math -ffp-contract=off" } */ /* { dg-final { scan-assembler-times "vmaddfp" 1 } } */ /* { dg-final { scan-assembler-times "fmadd " 1 } } */ /* { dg-final { scan-assembler-times "fmadds" 1 } } */