diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 30f66bd4ba70..dfd029051392 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,43 @@ +2006-01-19 Jan Hubicka + H.J. Lu + Evandro Menezes + + * invoke.texi (generic): Document + (i686) Update. + * config.gcc: Make x86_64-* and i686-* default to generic tunning. + * i386.h (TARGET_GENERIC32, TARGET_GENERIC64, TARGET_GENERIC, + TARGET_USE_INCDEC, TARGET_PAD_RETURNS): New macros. + (x86_use_incdec, x86_pad_returns): New variables + (TARGET_CPU_DEFAULT_generic): New constant + (TARGET_CPU_DEFAULT_NAMES): Add generic. + (enum processor_type): Add generic32 and generic64. + * i386.md (cpu attribute): Add generic32/generic64 + (movhi splitter): Behave sanely when both partial_reg_dependency and + partial_reg_stall are set. + (K8 splitters): Enable for generic as well. + * predicates.md (incdec_operand): Use TARGET_INCDEC + (aligned_operand): Avoid memory mismatch stalls. + * athlon.md: Enable for generic64, new patterns for 128bit moves. + * ppro.md: Enable for generic32 + * i386.c (generic64_cost, generic32_cost): New. + (m_GENERIC32, m_GENERIC64, m_GENERIC): New macros. + (x86_use_leave): Enable for generic64. (x86_use_sahf, + x86_ext_80387_constants): Enable for generic32. (x86_push_memory, + x86_movx, x86_unroll_strlen, x86_deep_branch, x86_use_simode_fiop, + x86_use_cltd, x86_promote_QImode, x86_sub_esp_4, x86_sub_esp_8, + x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves, + x86_partial_reg_dependency, x86_memory_mismatch_stall, + x86_accumulate_outgoing_args, x86_prologue_using_move, + x86_epilogue_using_move, x86_arch_always_fancy_math_387, + x86_sse_partial_reg_dependency, x86_four_jump_limit, x86_schedule): + Enable for generic. + (x86_use_incdec, x86_pad_returns): New. + (override_options): Add generic32 and generic64, translate "generic" + to generic32/generic64 and "i686" to "generic32", refuse + "generic32"/"generic64" as arch target. + (ix86_issue_rate, ix86_adjust_cost): Handle generic as athlon. + (ix86_reorg): Honor PAD_RETURNS. + 2006-01-19 Diego Novillo * tree-pretty-print.c (dump_generic_node): Handle diff --git a/gcc/config.gcc b/gcc/config.gcc index 076e71541f4e..252f10c80ade 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -2366,6 +2366,9 @@ if test x$with_cpu = x ; then # A Cirrus ARM variant. with_cpu="ep9312" ;; + i386-*-*) + with_cpu=i386 + ;; i486-*-*) with_cpu=i486 ;; @@ -2417,13 +2420,26 @@ if test x$with_cpu = x ; then pentium_m-*) with_cpu=pentium-m ;; - *) + pentiumpro-*) with_cpu=pentiumpro ;; + *) + with_cpu=generic + ;; esac ;; x86_64-*-*) - with_cpu=k8 + case ${target_noncanonical} in + k8-*|opteron-*|athlon_64-*) + with_cpu=k8 + ;; + nocona-*) + with_cpu=nocona + ;; + *) + with_cpu=generic + ;; + esac ;; alphaev6[78]*-*-*) with_cpu=ev67 @@ -2629,13 +2645,21 @@ case "${target}" in for which in arch cpu tune; do eval "val=\$with_$which" case ${val} in - "" | i386 | i486 \ + i386 | i486 \ | i586 | pentium | pentium-mmx | winchip-c6 | winchip2 \ | c3 | c3-2 | i686 | pentiumpro | pentium2 | pentium3 \ | pentium4 | k6 | k6-2 | k6-3 | athlon | athlon-tbird \ - | athlon-4 | athlon-xp | athlon-mp | k8 | opteron \ - | athlon64 | athlon-fx | prescott | pentium-m \ - | pentium4m | pentium3m| nocona) + | athlon-4 | athlon-xp | athlon-mp \ + | prescott | pentium-m | pentium4m | pentium3m) + case "${target}" in + x86_64-*-*) + echo "CPU given in --with-$which=$val doesn't support 64bit mode." 1>&2 + exit 1 + ;; + esac + # OK + ;; + "" | k8 | opteron | athlon64 | athlon-fx | nocona | generic) # OK ;; *) diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md index 1029a818196f..86130b778089 100644 --- a/gcc/config/i386/athlon.md +++ b/gcc/config/i386/athlon.md @@ -123,7 +123,7 @@ (define_cpu_unit "athlon-fmul" "athlon_fp") (define_cpu_unit "athlon-fstore" "athlon_fp") (define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)") -(define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)") +(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)") ;; Vector operations usually consume many of pipes. (define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)") @@ -131,26 +131,26 @@ ;; Jump instructions are executed in the branch unit completely transparent to us (define_insn_reservation "athlon_branch" 0 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "ibr")) "athlon-direct,athlon-ieu") (define_insn_reservation "athlon_call" 0 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "call,callv")) "athlon-vector,athlon-ieu") ;; Latency of push operation is 3 cycles, but ESP value is available ;; earlier (define_insn_reservation "athlon_push" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "push")) "athlon-direct,athlon-agu,athlon-store") (define_insn_reservation "athlon_pop" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "pop")) "athlon-vector,athlon-load,athlon-ieu") (define_insn_reservation "athlon_pop_k8" 3 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "pop")) "athlon-double,(athlon-ieu+athlon-load)") (define_insn_reservation "athlon_leave" 3 @@ -158,13 +158,13 @@ (eq_attr "type" "leave")) "athlon-vector,(athlon-ieu+athlon-load)") (define_insn_reservation "athlon_leave_k8" 3 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "leave")) "athlon-double,(athlon-ieu+athlon-load)") ;; Lea executes in AGU unit with 2 cycles latency. (define_insn_reservation "athlon_lea" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "lea")) "athlon-direct,athlon-agu,nothing") @@ -176,13 +176,13 @@ "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") ;; ??? Widening multiply is vector or double. (define_insn_reservation "athlon_imul_k8_DI" 4 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "none,unknown")))) "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") (define_insn_reservation "athlon_imul_k8" 3 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "imul") (eq_attr "memory" "none,unknown"))) "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") @@ -192,13 +192,13 @@ (eq_attr "memory" "load,both"))) "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") (define_insn_reservation "athlon_imul_mem_k8_DI" 7 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "load,both")))) "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") (define_insn_reservation "athlon_imul_mem_k8" 6 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "imul") (eq_attr "memory" "load,both"))) "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") @@ -211,59 +211,59 @@ ;; of the other code (define_insn_reservation "athlon_idiv" 6 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "idiv") (eq_attr "memory" "none,unknown"))) "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") (define_insn_reservation "athlon_idiv_mem" 9 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "idiv") (eq_attr "memory" "load,both"))) "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") ;; The parallelism of string instructions is not documented. Model it same way ;; as idiv to create smaller automata. This probably does not matter much. (define_insn_reservation "athlon_str" 6 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "str") (eq_attr "memory" "load,both,store"))) "athlon-vector,athlon-load,athlon-ieu0*6") (define_insn_reservation "athlon_idirect" 1 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "athlon-direct,athlon-ieu") (define_insn_reservation "athlon_ivector" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "athlon-vector,athlon-ieu,athlon-ieu") (define_insn_reservation "athlon_idirect_loadmov" 3 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "imov") (eq_attr "memory" "load"))) "athlon-direct,athlon-load") (define_insn_reservation "athlon_idirect_load" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "load")))) "athlon-direct,athlon-load,athlon-ieu") (define_insn_reservation "athlon_ivector_load" 6 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "load")))) "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") (define_insn_reservation "athlon_idirect_movstore" 1 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "imov") (eq_attr "memory" "store"))) "athlon-direct,athlon-agu,athlon-store") (define_insn_reservation "athlon_idirect_both" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "both")))) @@ -271,7 +271,7 @@ athlon-ieu,athlon-store, athlon-store") (define_insn_reservation "athlon_ivector_both" 6 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "both")))) @@ -280,14 +280,14 @@ athlon-ieu, athlon-store") (define_insn_reservation "athlon_idirect_store" 1 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "store")))) "athlon-direct,(athlon-ieu+athlon-agu), athlon-store") (define_insn_reservation "athlon_ivector_store" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "store")))) @@ -302,7 +302,7 @@ (eq_attr "mode" "XF")))) "athlon-vector,athlon-fpload2,athlon-fvector*9") (define_insn_reservation "athlon_fldxf_k8" 13 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "load") (eq_attr "mode" "XF")))) @@ -314,7 +314,7 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fany") (define_insn_reservation "athlon_fld_k8" 2 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fstore") @@ -326,7 +326,7 @@ (eq_attr "mode" "XF")))) "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") (define_insn_reservation "athlon_fstxf_k8" 8 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "store,both") (eq_attr "mode" "XF")))) @@ -337,16 +337,16 @@ (eq_attr "memory" "store,both"))) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") (define_insn_reservation "athlon_fst_k8" 2 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") (define_insn_reservation "athlon_fist" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "fistp")) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") (define_insn_reservation "athlon_fmov" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "fmov")) "athlon-direct,athlon-fpsched,athlon-faddmul") (define_insn_reservation "athlon_fadd_load" 4 @@ -355,12 +355,12 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_fadd_load_k8" 6 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fop") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_fadd" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "fop")) "athlon-direct,athlon-fpsched,athlon-fadd") (define_insn_reservation "athlon_fmul_load" 4 @@ -369,16 +369,16 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fmul") (define_insn_reservation "athlon_fmul_load_k8" 6 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fmul") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fmul") (define_insn_reservation "athlon_fmul" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "fmul")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fsgn" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "fsgn")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fdiv_load" 24 @@ -387,7 +387,7 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fmul") (define_insn_reservation "athlon_fdiv_load_k8" 13 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fdiv") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fmul") @@ -396,16 +396,16 @@ (eq_attr "type" "fdiv")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fdiv_k8" 11 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "fdiv")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fpspc_load" 103 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "fpspc") (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload,athlon-fvector") (define_insn_reservation "athlon_fpspc" 100 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "fpspc")) "athlon-vector,athlon-fpsched,athlon-fvector") (define_insn_reservation "athlon_fcmov_load" 7 @@ -418,12 +418,12 @@ (eq_attr "type" "fcmov")) "athlon-vector,athlon-fpsched,athlon-fvector") (define_insn_reservation "athlon_fcmov_load_k8" 17 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fcmov") (eq_attr "memory" "load"))) "athlon-vector,athlon-fploadk8,athlon-fvector") (define_insn_reservation "athlon_fcmov_k8" 15 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "fcmov")) "athlon-vector,athlon-fpsched,athlon-fvector") ;; fcomi is vector decoded by uses only one pipe. @@ -434,13 +434,13 @@ (eq_attr "memory" "load")))) "athlon-vector,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_fcomi_load_k8" 5 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fcmp") (and (eq_attr "athlon_decode" "vector") (eq_attr "memory" "load")))) "athlon-vector,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_fcomi" 3 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") (eq_attr "type" "fcmp"))) "athlon-vector,athlon-fpsched,athlon-fadd") @@ -450,18 +450,18 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_fcom_load_k8" 4 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "fcmp") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_fcom" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "fcmp")) "athlon-direct,athlon-fpsched,athlon-fadd") ;; Never seen by the scheduler because we still don't do post reg-stack ;; scheduling. ;(define_insn_reservation "athlon_fxch" 2 -; (and (eq_attr "cpu" "athlon,k8") +; (and (eq_attr "cpu" "athlon,k8,generic64") ; (eq_attr "type" "fxch")) ; "athlon-direct,athlon-fpsched,athlon-fany") @@ -477,8 +477,13 @@ (and (eq_attr "type" "ssemov") (match_operand:DF 1 "memory_operand" ""))) "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_movsd_load_generic64" 2 + (and (eq_attr "cpu" "generic64") + (and (eq_attr "type" "ssemov") + (match_operand:DF 1 "memory_operand" ""))) + "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)") (define_insn_reservation "athlon_movaps_load_k8" 2 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "load")))) @@ -496,7 +501,7 @@ (eq_attr "memory" "load")))) "athlon-vector,athlon-fpload,(athlon-fany*2)") (define_insn_reservation "athlon_movss_load_k8" 1 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "SF,DI") (eq_attr "memory" "load")))) @@ -507,57 +512,57 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fany") (define_insn_reservation "athlon_mmxsseld_k8" 2 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fstore") (define_insn_reservation "athlon_mmxssest" 3 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "mmxmov,ssemov") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "store,both")))) "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") (define_insn_reservation "athlon_mmxssest_k8" 3 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "mmxmov,ssemov") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "store,both")))) "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") (define_insn_reservation "athlon_mmxssest_short" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "store,both"))) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") -(define_insn_reservation "athlon_movaps" 2 - (and (eq_attr "cpu" "k8") +(define_insn_reservation "athlon_movaps_k8" 2 + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssemov") (eq_attr "mode" "V4SF,V2DF,TI"))) - "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") -(define_insn_reservation "athlon_movaps_k8" 2 + "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))") +(define_insn_reservation "athlon_movaps" 2 (and (eq_attr "cpu" "athlon") (and (eq_attr "type" "ssemov") (eq_attr "mode" "V4SF,V2DF,TI"))) "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") (define_insn_reservation "athlon_mmxssemov" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "mmxmov,ssemov")) "athlon-direct,athlon-fpsched,athlon-faddmul") (define_insn_reservation "athlon_mmxmul_load" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "mmxmul") (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fmul") (define_insn_reservation "athlon_mmxmul" 3 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "mmxmul")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_mmx_load" 3 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "unit" "mmx") (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-faddmul") (define_insn_reservation "athlon_mmx" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "unit" "mmx")) "athlon-direct,athlon-fpsched,athlon-faddmul") ;; SSE operations are handled by the i387 unit as well. The latency @@ -569,7 +574,7 @@ (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload2,(athlon-fmul*2)") (define_insn_reservation "athlon_sselog_load_k8" 5 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "sselog,sselog1") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") @@ -578,7 +583,7 @@ (eq_attr "type" "sselog,sselog1")) "athlon-vector,athlon-fpsched,athlon-fmul*2") (define_insn_reservation "athlon_sselog_k8" 3 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "sselog,sselog1")) "athlon-double,athlon-fpsched,athlon-fmul") ;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. @@ -589,13 +594,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_ssecmp_load_k8" 4 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssecmp") (and (eq_attr "mode" "SF,DF,DI,TI") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_ssecmp" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssecmp") (eq_attr "mode" "SF,DF,DI,TI"))) "athlon-direct,athlon-fpsched,athlon-fadd") @@ -605,7 +610,7 @@ (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload2,(athlon-fadd*2)") (define_insn_reservation "athlon_ssecmpvector_load_k8" 5 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssecmp") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") @@ -614,7 +619,7 @@ (eq_attr "type" "ssecmp")) "athlon-vector,athlon-fpsched,(athlon-fadd*2)") (define_insn_reservation "athlon_ssecmpvector_k8" 3 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "ssecmp")) "athlon-double,athlon-fpsched,(athlon-fadd*2)") (define_insn_reservation "athlon_ssecomi_load" 4 @@ -623,12 +628,12 @@ (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_ssecomi_load_k8" 6 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssecomi") (eq_attr "memory" "load"))) "athlon-vector,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_ssecomi" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "ssecmp")) "athlon-vector,athlon-fpsched,athlon-fadd") (define_insn_reservation "athlon_sseadd_load" 4 @@ -638,13 +643,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_sseadd_load_k8" 6 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "sseadd") (and (eq_attr "mode" "SF,DF,DI") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_sseadd" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "sseadd") (eq_attr "mode" "SF,DF,DI"))) "athlon-direct,athlon-fpsched,athlon-fadd") @@ -654,7 +659,7 @@ (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload2,(athlon-fadd*2)") (define_insn_reservation "athlon_sseaddvector_load_k8" 7 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "sseadd") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") @@ -663,7 +668,7 @@ (eq_attr "type" "sseadd")) "athlon-vector,athlon-fpsched,(athlon-fadd*2)") (define_insn_reservation "athlon_sseaddvector_k8" 5 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "sseadd")) "athlon-double,athlon-fpsched,(athlon-fadd*2)") @@ -673,28 +678,28 @@ ;; cvtss2sd (define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4 - (and (eq_attr "cpu" "k8,athlon") + (and (eq_attr "cpu" "k8,athlon,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "direct") (and (eq_attr "mode" "DF") (eq_attr "memory" "load"))))) "athlon-direct,athlon-fploadk8,athlon-fstore") (define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "direct") (eq_attr "mode" "DF")))) "athlon-direct,athlon-fpsched,athlon-fstore") ;; cvtps2pd. Model same way the other double decoded FP conversions. (define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 - (and (eq_attr "cpu" "k8,athlon") + (and (eq_attr "cpu" "k8,athlon,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "double") (and (eq_attr "mode" "V2DF,V4SF,TI") (eq_attr "memory" "load"))))) "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") (define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 - (and (eq_attr "cpu" "k8,athlon") + (and (eq_attr "cpu" "k8,athlon,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "double") (eq_attr "mode" "V2DF,V4SF,TI")))) @@ -717,7 +722,7 @@ (eq_attr "memory" "load"))))) "athlon-vector,athlon-fpload,(athlon-fstore*2)") (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "sseicvt") (and (eq_attr "athlon_decode" "double") (and (eq_attr "mode" "SF,DF") @@ -725,7 +730,7 @@ "athlon-double,athlon-fploadk8,(athlon-fstore*2)") ;; cvtsi2sd reg,reg is double decoded (vector on Athlon) (define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 - (and (eq_attr "cpu" "k8,athlon") + (and (eq_attr "cpu" "k8,athlon,generic64") (and (eq_attr "type" "sseicvt") (and (eq_attr "athlon_decode" "double") (and (eq_attr "mode" "SF,DF") @@ -733,7 +738,7 @@ "athlon-double,athlon-fploadk8,athlon-fstore") ;; cvtsi2ss reg, reg is doublepath (define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "sseicvt") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "mode" "SF,DF") @@ -741,7 +746,7 @@ "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") ;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 (define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 - (and (eq_attr "cpu" "k8,athlon") + (and (eq_attr "cpu" "k8,athlon,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "double") (and (eq_attr "mode" "SF") @@ -749,14 +754,14 @@ "athlon-double,athlon-fploadk8,(athlon-fstore*3)") ;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 (define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "mode" "SF") (eq_attr "memory" "none"))))) "athlon-vector,athlon-fpsched,(athlon-fvector*3)") (define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "mode" "V4SF,V2DF,TI") @@ -765,7 +770,7 @@ ;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 ;; ??? Why it is fater than cvtsd2ss? (define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "mode" "V4SF,V2DF,TI") @@ -773,7 +778,7 @@ "athlon-vector,athlon-fpsched,athlon-fvector*2") ;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 (define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "sseicvt") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "mode" "SI,DI") @@ -788,7 +793,7 @@ (eq_attr "memory" "none"))))) "athlon-vector,athlon-fpsched,athlon-fvector") (define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "sseicvt") (and (eq_attr "athlon_decode" "double") (and (eq_attr "mode" "SI,DI") @@ -803,13 +808,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fmul") (define_insn_reservation "athlon_ssemul_load_k8" 6 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssemul") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fmul") (define_insn_reservation "athlon_ssemul" 4 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssemul") (eq_attr "mode" "SF,DF"))) "athlon-direct,athlon-fpsched,athlon-fmul") @@ -819,7 +824,7 @@ (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload2,(athlon-fmul*2)") (define_insn_reservation "athlon_ssemulvector_load_k8" 7 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssemul") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") @@ -828,7 +833,7 @@ (eq_attr "type" "ssemul")) "athlon-vector,athlon-fpsched,(athlon-fmul*2)") (define_insn_reservation "athlon_ssemulvector_k8" 5 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "ssemul")) "athlon-double,athlon-fpsched,(athlon-fmul*2)") ;; divsd timings. divss is faster @@ -839,13 +844,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fmul*17") (define_insn_reservation "athlon_ssediv_load_k8" 22 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fmul*17") (define_insn_reservation "athlon_ssediv" 20 - (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssediv") (eq_attr "mode" "SF,DF"))) "athlon-direct,athlon-fpsched,athlon-fmul*17") @@ -855,7 +860,7 @@ (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload2,athlon-fmul*34") (define_insn_reservation "athlon_ssedivvector_load_k8" 35 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssediv") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,athlon-fmul*34") @@ -864,6 +869,6 @@ (eq_attr "type" "ssediv")) "athlon-vector,athlon-fmul*34") (define_insn_reservation "athlon_ssedivvector_k8" 39 - (and (eq_attr "cpu" "k8") + (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "ssediv")) "athlon-double,athlon-fmul*34") diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e45d2a266bd5..107545ec1855 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -587,6 +587,118 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ }; +/* Generic64 should produce code tuned for Nocona and K8. */ +static const +struct processor_costs generic64_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + /* On all chips taken into consideration lea is 2 cycles and more. With + this cost however our current implementation of synth_mult results in + use of unnecesary temporary registers causing regression on several + SPECfp benchmarks. */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of loading integer registers */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value + is increased to perhaps more appropriate value of 5. */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ +}; + +/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ +static const +struct processor_costs generic32_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of loading integer registers */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ +}; + const struct processor_costs *ix86_cost = &pentium_cost; /* Processor feature/optimization bitmasks. */ @@ -600,52 +712,81 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_K8 (1<