mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-19 00:01:21 +08:00
invoke.texi (generic): Document (i686) Update.
* invoke.texi (generic): Document (i686) Update. * config.gcc: Make x86_64-* and i686-* default to generic tunning. * i386.h (TARGET_GENERIC32, TARGET_GENERIC64, TARGET_GENERIC, TARGET_USE_INCDEC, TARGET_PAD_RETURNS): New macros. (x86_use_incdec, x86_pad_returns): New variables (TARGET_CPU_DEFAULT_generic): New constant (TARGET_CPU_DEFAULT_NAMES): Add generic. (enum processor_type): Add generic32 and generic64. * i386.md (cpu attribute): Add generic32/generic64 (movhi splitter): Behave sanely when both partial_reg_dependency and partial_reg_stall are set. (K8 splitters): Enable for generic as well. * predicates.md (incdec_operand): Use TARGET_INCDEC (aligned_operand): Avoid memory mismatch stalls. * athlon.md: Enable for generic64, new patterns for 128bit moves. * ppro.md: Enable for generic32 * i386.c (generic64_cost, generic32_cost): New. (m_GENERIC32, m_GENERIC64, m_GENERIC): New macros. (x86_use_leave): Enable for generic64. (x86_use_sahf, x86_ext_80387_constants): Enable for generic32. (x86_push_memory, x86_movx, x86_unroll_strlen, x86_deep_branch, x86_use_simode_fiop, x86_use_cltd, x86_promote_QImode, x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall, x86_accumulate_outgoing_args, x86_prologue_using_move, x86_epilogue_using_move, x86_arch_always_fancy_math_387, x86_sse_partial_reg_dependency, x86_four_jump_limit, x86_schedule): Enable for generic. (x86_use_incdec, x86_pad_returns): New. (override_options): Add generic32 and generic64, translate "generic" to generic32/generic64 and "i686" to "generic32", refuse "generic32"/"generic64" as arch target. (ix86_issue_rate, ix86_adjust_cost): Handle generic as athlon. (ix86_reorg): Honor PAD_RETURNS. Co-Authored-By: Evandro Menezes <evandro.menezes@amd.com> Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com> From-SVN: r109971
This commit is contained in:
parent
d103fa240f
commit
d326eaf06a
@ -1,3 +1,43 @@
|
||||
2006-01-19 Jan Hubicka <jh@suse.cz>
|
||||
H.J. Lu <hongjiu.lu@intel.com>
|
||||
Evandro Menezes <evandro.menezes@amd.com>
|
||||
|
||||
* invoke.texi (generic): Document
|
||||
(i686) Update.
|
||||
* config.gcc: Make x86_64-* and i686-* default to generic tunning.
|
||||
* i386.h (TARGET_GENERIC32, TARGET_GENERIC64, TARGET_GENERIC,
|
||||
TARGET_USE_INCDEC, TARGET_PAD_RETURNS): New macros.
|
||||
(x86_use_incdec, x86_pad_returns): New variables
|
||||
(TARGET_CPU_DEFAULT_generic): New constant
|
||||
(TARGET_CPU_DEFAULT_NAMES): Add generic.
|
||||
(enum processor_type): Add generic32 and generic64.
|
||||
* i386.md (cpu attribute): Add generic32/generic64
|
||||
(movhi splitter): Behave sanely when both partial_reg_dependency and
|
||||
partial_reg_stall are set.
|
||||
(K8 splitters): Enable for generic as well.
|
||||
* predicates.md (incdec_operand): Use TARGET_INCDEC
|
||||
(aligned_operand): Avoid memory mismatch stalls.
|
||||
* athlon.md: Enable for generic64, new patterns for 128bit moves.
|
||||
* ppro.md: Enable for generic32
|
||||
* i386.c (generic64_cost, generic32_cost): New.
|
||||
(m_GENERIC32, m_GENERIC64, m_GENERIC): New macros.
|
||||
(x86_use_leave): Enable for generic64. (x86_use_sahf,
|
||||
x86_ext_80387_constants): Enable for generic32. (x86_push_memory,
|
||||
x86_movx, x86_unroll_strlen, x86_deep_branch, x86_use_simode_fiop,
|
||||
x86_use_cltd, x86_promote_QImode, x86_sub_esp_4, x86_sub_esp_8,
|
||||
x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves,
|
||||
x86_partial_reg_dependency, x86_memory_mismatch_stall,
|
||||
x86_accumulate_outgoing_args, x86_prologue_using_move,
|
||||
x86_epilogue_using_move, x86_arch_always_fancy_math_387,
|
||||
x86_sse_partial_reg_dependency, x86_four_jump_limit, x86_schedule):
|
||||
Enable for generic.
|
||||
(x86_use_incdec, x86_pad_returns): New.
|
||||
(override_options): Add generic32 and generic64, translate "generic"
|
||||
to generic32/generic64 and "i686" to "generic32", refuse
|
||||
"generic32"/"generic64" as arch target.
|
||||
(ix86_issue_rate, ix86_adjust_cost): Handle generic as athlon.
|
||||
(ix86_reorg): Honor PAD_RETURNS.
|
||||
|
||||
2006-01-19 Diego Novillo <dnovillo@redhat.com>
|
||||
|
||||
* tree-pretty-print.c (dump_generic_node): Handle
|
||||
|
@ -2366,6 +2366,9 @@ if test x$with_cpu = x ; then
|
||||
# A Cirrus ARM variant.
|
||||
with_cpu="ep9312"
|
||||
;;
|
||||
i386-*-*)
|
||||
with_cpu=i386
|
||||
;;
|
||||
i486-*-*)
|
||||
with_cpu=i486
|
||||
;;
|
||||
@ -2417,13 +2420,26 @@ if test x$with_cpu = x ; then
|
||||
pentium_m-*)
|
||||
with_cpu=pentium-m
|
||||
;;
|
||||
*)
|
||||
pentiumpro-*)
|
||||
with_cpu=pentiumpro
|
||||
;;
|
||||
*)
|
||||
with_cpu=generic
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
x86_64-*-*)
|
||||
with_cpu=k8
|
||||
case ${target_noncanonical} in
|
||||
k8-*|opteron-*|athlon_64-*)
|
||||
with_cpu=k8
|
||||
;;
|
||||
nocona-*)
|
||||
with_cpu=nocona
|
||||
;;
|
||||
*)
|
||||
with_cpu=generic
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
alphaev6[78]*-*-*)
|
||||
with_cpu=ev67
|
||||
@ -2629,13 +2645,21 @@ case "${target}" in
|
||||
for which in arch cpu tune; do
|
||||
eval "val=\$with_$which"
|
||||
case ${val} in
|
||||
"" | i386 | i486 \
|
||||
i386 | i486 \
|
||||
| i586 | pentium | pentium-mmx | winchip-c6 | winchip2 \
|
||||
| c3 | c3-2 | i686 | pentiumpro | pentium2 | pentium3 \
|
||||
| pentium4 | k6 | k6-2 | k6-3 | athlon | athlon-tbird \
|
||||
| athlon-4 | athlon-xp | athlon-mp | k8 | opteron \
|
||||
| athlon64 | athlon-fx | prescott | pentium-m \
|
||||
| pentium4m | pentium3m| nocona)
|
||||
| athlon-4 | athlon-xp | athlon-mp \
|
||||
| prescott | pentium-m | pentium4m | pentium3m)
|
||||
case "${target}" in
|
||||
x86_64-*-*)
|
||||
echo "CPU given in --with-$which=$val doesn't support 64bit mode." 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
# OK
|
||||
;;
|
||||
"" | k8 | opteron | athlon64 | athlon-fx | nocona | generic)
|
||||
# OK
|
||||
;;
|
||||
*)
|
||||
|
@ -123,7 +123,7 @@
|
||||
(define_cpu_unit "athlon-fmul" "athlon_fp")
|
||||
(define_cpu_unit "athlon-fstore" "athlon_fp")
|
||||
(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
|
||||
(define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)")
|
||||
(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
|
||||
|
||||
;; Vector operations usually consume many of pipes.
|
||||
(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
|
||||
@ -131,26 +131,26 @@
|
||||
|
||||
;; Jump instructions are executed in the branch unit completely transparent to us
|
||||
(define_insn_reservation "athlon_branch" 0
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "ibr"))
|
||||
"athlon-direct,athlon-ieu")
|
||||
(define_insn_reservation "athlon_call" 0
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"athlon-vector,athlon-ieu")
|
||||
|
||||
;; Latency of push operation is 3 cycles, but ESP value is available
|
||||
;; earlier
|
||||
(define_insn_reservation "athlon_push" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "push"))
|
||||
"athlon-direct,athlon-agu,athlon-store")
|
||||
(define_insn_reservation "athlon_pop" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "pop"))
|
||||
"athlon-vector,athlon-load,athlon-ieu")
|
||||
(define_insn_reservation "athlon_pop_k8" 3
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "pop"))
|
||||
"athlon-double,(athlon-ieu+athlon-load)")
|
||||
(define_insn_reservation "athlon_leave" 3
|
||||
@ -158,13 +158,13 @@
|
||||
(eq_attr "type" "leave"))
|
||||
"athlon-vector,(athlon-ieu+athlon-load)")
|
||||
(define_insn_reservation "athlon_leave_k8" 3
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "leave"))
|
||||
"athlon-double,(athlon-ieu+athlon-load)")
|
||||
|
||||
;; Lea executes in AGU unit with 2 cycles latency.
|
||||
(define_insn_reservation "athlon_lea" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "lea"))
|
||||
"athlon-direct,athlon-agu,nothing")
|
||||
|
||||
@ -176,13 +176,13 @@
|
||||
"athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
|
||||
;; ??? Widening multiply is vector or double.
|
||||
(define_insn_reservation "athlon_imul_k8_DI" 4
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "imul")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
|
||||
(define_insn_reservation "athlon_imul_k8" 3
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
|
||||
@ -192,13 +192,13 @@
|
||||
(eq_attr "memory" "load,both")))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
|
||||
(define_insn_reservation "athlon_imul_mem_k8_DI" 7
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "imul")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "load,both"))))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
|
||||
(define_insn_reservation "athlon_imul_mem_k8" 6
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
|
||||
@ -211,59 +211,59 @@
|
||||
;; of the other code
|
||||
|
||||
(define_insn_reservation "athlon_idiv" 6
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
|
||||
(define_insn_reservation "athlon_idiv_mem" 9
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
|
||||
;; The parallelism of string instructions is not documented. Model it same way
|
||||
;; as idiv to create smaller automata. This probably does not matter much.
|
||||
(define_insn_reservation "athlon_str" 6
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "str")
|
||||
(eq_attr "memory" "load,both,store")))
|
||||
"athlon-vector,athlon-load,athlon-ieu0*6")
|
||||
|
||||
(define_insn_reservation "athlon_idirect" 1
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-direct,athlon-ieu")
|
||||
(define_insn_reservation "athlon_ivector" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-vector,athlon-ieu,athlon-ieu")
|
||||
(define_insn_reservation "athlon_idirect_loadmov" 3
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-load")
|
||||
(define_insn_reservation "athlon_idirect_load" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-load,athlon-ieu")
|
||||
(define_insn_reservation "athlon_ivector_load" 6
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
|
||||
(define_insn_reservation "athlon_idirect_movstore" 1
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "store")))
|
||||
"athlon-direct,athlon-agu,athlon-store")
|
||||
(define_insn_reservation "athlon_idirect_both" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "both"))))
|
||||
@ -271,7 +271,7 @@
|
||||
athlon-ieu,athlon-store,
|
||||
athlon-store")
|
||||
(define_insn_reservation "athlon_ivector_both" 6
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "both"))))
|
||||
@ -280,14 +280,14 @@
|
||||
athlon-ieu,
|
||||
athlon-store")
|
||||
(define_insn_reservation "athlon_idirect_store" 1
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "store"))))
|
||||
"athlon-direct,(athlon-ieu+athlon-agu),
|
||||
athlon-store")
|
||||
(define_insn_reservation "athlon_ivector_store" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "store"))))
|
||||
@ -302,7 +302,7 @@
|
||||
(eq_attr "mode" "XF"))))
|
||||
"athlon-vector,athlon-fpload2,athlon-fvector*9")
|
||||
(define_insn_reservation "athlon_fldxf_k8" 13
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "mode" "XF"))))
|
||||
@ -314,7 +314,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fany")
|
||||
(define_insn_reservation "athlon_fld_k8" 2
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
@ -326,7 +326,7 @@
|
||||
(eq_attr "mode" "XF"))))
|
||||
"athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
|
||||
(define_insn_reservation "athlon_fstxf_k8" 8
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "store,both")
|
||||
(eq_attr "mode" "XF"))))
|
||||
@ -337,16 +337,16 @@
|
||||
(eq_attr "memory" "store,both")))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_fst_k8" 2
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_fist" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "fistp"))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_fmov" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "fmov"))
|
||||
"athlon-direct,athlon-fpsched,athlon-faddmul")
|
||||
(define_insn_reservation "athlon_fadd_load" 4
|
||||
@ -355,12 +355,12 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fadd_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fop")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fadd" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "fop"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fmul_load" 4
|
||||
@ -369,16 +369,16 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fmul_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fmul")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fmul" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "fmul"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fsgn" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "fsgn"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fdiv_load" 24
|
||||
@ -387,7 +387,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fdiv_load_k8" 13
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fdiv")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul")
|
||||
@ -396,16 +396,16 @@
|
||||
(eq_attr "type" "fdiv"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fdiv_k8" 11
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "fdiv"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fpspc_load" 103
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "fpspc")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fpspc" 100
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "fpspc"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fcmov_load" 7
|
||||
@ -418,12 +418,12 @@
|
||||
(eq_attr "type" "fcmov"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fcmov_load_k8" 17
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fcmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fploadk8,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fcmov_k8" 15
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "fcmov"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector")
|
||||
;; fcomi is vector decoded by uses only one pipe.
|
||||
@ -434,13 +434,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcomi_load_k8" 5
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcomi" 3
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(eq_attr "type" "fcmp")))
|
||||
"athlon-vector,athlon-fpsched,athlon-fadd")
|
||||
@ -450,18 +450,18 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcom_load_k8" 4
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcom" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "fcmp"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
;; Never seen by the scheduler because we still don't do post reg-stack
|
||||
;; scheduling.
|
||||
;(define_insn_reservation "athlon_fxch" 2
|
||||
; (and (eq_attr "cpu" "athlon,k8")
|
||||
; (and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
; (eq_attr "type" "fxch"))
|
||||
; "athlon-direct,athlon-fpsched,athlon-fany")
|
||||
|
||||
@ -477,8 +477,13 @@
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(match_operand:DF 1 "memory_operand" "")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
(define_insn_reservation "athlon_movsd_load_generic64" 2
|
||||
(and (eq_attr "cpu" "generic64")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(match_operand:DF 1 "memory_operand" "")))
|
||||
"athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
|
||||
(define_insn_reservation "athlon_movaps_load_k8" 2
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "load"))))
|
||||
@ -496,7 +501,7 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-fpload,(athlon-fany*2)")
|
||||
(define_insn_reservation "athlon_movss_load_k8" 1
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "SF,DI")
|
||||
(eq_attr "memory" "load"))))
|
||||
@ -507,57 +512,57 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fany")
|
||||
(define_insn_reservation "athlon_mmxsseld_k8" 2
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
(define_insn_reservation "athlon_mmxssest" 3
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "store,both"))))
|
||||
"athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
|
||||
(define_insn_reservation "athlon_mmxssest_k8" 3
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "store,both"))))
|
||||
"athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
|
||||
(define_insn_reservation "athlon_mmxssest_short" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_movaps" 2
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(define_insn_reservation "athlon_movaps_k8" 2
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "mode" "V4SF,V2DF,TI")))
|
||||
"athlon-double,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
|
||||
(define_insn_reservation "athlon_movaps_k8" 2
|
||||
"athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
|
||||
(define_insn_reservation "athlon_movaps" 2
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "mode" "V4SF,V2DF,TI")))
|
||||
"athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
|
||||
(define_insn_reservation "athlon_mmxssemov" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "mmxmov,ssemov"))
|
||||
"athlon-direct,athlon-fpsched,athlon-faddmul")
|
||||
(define_insn_reservation "athlon_mmxmul_load" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "mmxmul")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul")
|
||||
(define_insn_reservation "athlon_mmxmul" 3
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "mmxmul"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_mmx_load" 3
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "unit" "mmx")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-faddmul")
|
||||
(define_insn_reservation "athlon_mmx" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "unit" "mmx"))
|
||||
"athlon-direct,athlon-fpsched,athlon-faddmul")
|
||||
;; SSE operations are handled by the i387 unit as well. The latency
|
||||
@ -569,7 +574,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload2,(athlon-fmul*2)")
|
||||
(define_insn_reservation "athlon_sselog_load_k8" 5
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "sselog,sselog1")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
|
||||
@ -578,7 +583,7 @@
|
||||
(eq_attr "type" "sselog,sselog1"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fmul*2")
|
||||
(define_insn_reservation "athlon_sselog_k8" 3
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "sselog,sselog1"))
|
||||
"athlon-double,athlon-fpsched,athlon-fmul")
|
||||
;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
|
||||
@ -589,13 +594,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecmp_load_k8" 4
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(and (eq_attr "mode" "SF,DF,DI,TI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecmp" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "mode" "SF,DF,DI,TI")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
@ -605,7 +610,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload2,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
|
||||
@ -614,7 +619,7 @@
|
||||
(eq_attr "type" "ssecmp"))
|
||||
"athlon-vector,athlon-fpsched,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_ssecmpvector_k8" 3
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "ssecmp"))
|
||||
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_ssecomi_load" 4
|
||||
@ -623,12 +628,12 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecomi_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecomi" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "ssecmp"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fadd")
|
||||
(define_insn_reservation "athlon_sseadd_load" 4
|
||||
@ -638,13 +643,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_sseadd_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(and (eq_attr "mode" "SF,DF,DI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_sseadd" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "mode" "SF,DF,DI")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
@ -654,7 +659,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload2,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_sseaddvector_load_k8" 7
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
|
||||
@ -663,7 +668,7 @@
|
||||
(eq_attr "type" "sseadd"))
|
||||
"athlon-vector,athlon-fpsched,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_sseaddvector_k8" 5
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "sseadd"))
|
||||
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
|
||||
|
||||
@ -673,28 +678,28 @@
|
||||
|
||||
;; cvtss2sd
|
||||
(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
|
||||
(and (eq_attr "cpu" "k8,athlon")
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(and (eq_attr "mode" "DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(eq_attr "mode" "DF"))))
|
||||
"athlon-direct,athlon-fpsched,athlon-fstore")
|
||||
;; cvtps2pd. Model same way the other double decoded FP conversions.
|
||||
(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
|
||||
(and (eq_attr "cpu" "k8,athlon")
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "double")
|
||||
(and (eq_attr "mode" "V2DF,V4SF,TI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
|
||||
(and (eq_attr "cpu" "k8,athlon")
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "double")
|
||||
(eq_attr "mode" "V2DF,V4SF,TI"))))
|
||||
@ -717,7 +722,7 @@
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-vector,athlon-fpload,(athlon-fstore*2)")
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "athlon_decode" "double")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
@ -725,7 +730,7 @@
|
||||
"athlon-double,athlon-fploadk8,(athlon-fstore*2)")
|
||||
;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
|
||||
(and (eq_attr "cpu" "k8,athlon")
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "athlon_decode" "double")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
@ -733,7 +738,7 @@
|
||||
"athlon-double,athlon-fploadk8,athlon-fstore")
|
||||
;; cvtsi2ss reg, reg is doublepath
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
@ -741,7 +746,7 @@
|
||||
"athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
|
||||
;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
|
||||
(and (eq_attr "cpu" "k8,athlon")
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "double")
|
||||
(and (eq_attr "mode" "SF")
|
||||
@ -749,14 +754,14 @@
|
||||
"athlon-double,athlon-fploadk8,(athlon-fstore*3)")
|
||||
;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fpsched,(athlon-fvector*3)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
@ -765,7 +770,7 @@
|
||||
;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
|
||||
;; ??? Why it is fater than cvtsd2ss?
|
||||
(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
@ -773,7 +778,7 @@
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector*2")
|
||||
;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
|
||||
(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
@ -788,7 +793,7 @@
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "athlon_decode" "double")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
@ -803,13 +808,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul")
|
||||
(define_insn_reservation "athlon_ssemul_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul")
|
||||
(define_insn_reservation "athlon_ssemul" 4
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "mode" "SF,DF")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
@ -819,7 +824,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload2,(athlon-fmul*2)")
|
||||
(define_insn_reservation "athlon_ssemulvector_load_k8" 7
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
|
||||
@ -828,7 +833,7 @@
|
||||
(eq_attr "type" "ssemul"))
|
||||
"athlon-vector,athlon-fpsched,(athlon-fmul*2)")
|
||||
(define_insn_reservation "athlon_ssemulvector_k8" 5
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "ssemul"))
|
||||
"athlon-double,athlon-fpsched,(athlon-fmul*2)")
|
||||
;; divsd timings. divss is faster
|
||||
@ -839,13 +844,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul*17")
|
||||
(define_insn_reservation "athlon_ssediv_load_k8" 22
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul*17")
|
||||
(define_insn_reservation "athlon_ssediv" 20
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "mode" "SF,DF")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul*17")
|
||||
@ -855,7 +860,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload2,athlon-fmul*34")
|
||||
(define_insn_reservation "athlon_ssedivvector_load_k8" 35
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,athlon-fmul*34")
|
||||
@ -864,6 +869,6 @@
|
||||
(eq_attr "type" "ssediv"))
|
||||
"athlon-vector,athlon-fmul*34")
|
||||
(define_insn_reservation "athlon_ssedivvector_k8" 39
|
||||
(and (eq_attr "cpu" "k8")
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "ssediv"))
|
||||
"athlon-double,athlon-fmul*34")
|
||||
|
@ -587,6 +587,118 @@ struct processor_costs nocona_cost = {
|
||||
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
/* Generic64 should produce code tuned for Nocona and K8. */
|
||||
static const
|
||||
struct processor_costs generic64_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction */
|
||||
/* On all chips taken into consideration lea is 2 cycles and more. With
|
||||
this cost however our current implementation of synth_mult results in
|
||||
use of unnecesary temporary registers causing regression on several
|
||||
SPECfp benchmarks. */
|
||||
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
|
||||
COSTS_N_INSNS (1), /* variable shift costs */
|
||||
COSTS_N_INSNS (1), /* constant shift costs */
|
||||
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
|
||||
COSTS_N_INSNS (4), /* HI */
|
||||
COSTS_N_INSNS (3), /* SI */
|
||||
COSTS_N_INSNS (4), /* DI */
|
||||
COSTS_N_INSNS (2)}, /* other */
|
||||
0, /* cost of multiply per each bit set */
|
||||
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
|
||||
COSTS_N_INSNS (26), /* HI */
|
||||
COSTS_N_INSNS (42), /* SI */
|
||||
COSTS_N_INSNS (74), /* DI */
|
||||
COSTS_N_INSNS (74)}, /* other */
|
||||
COSTS_N_INSNS (1), /* cost of movsx */
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 12}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{6, 6, 8}, /* cost of loading integer registers */
|
||||
2, /* cost of moving MMX register */
|
||||
{8, 8}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{8, 8, 8}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{8, 8, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
64, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
/* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
|
||||
is increased to perhaps more appropriate value of 5. */
|
||||
3, /* Branch cost */
|
||||
COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (8), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
|
||||
static const
|
||||
struct processor_costs generic32_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction */
|
||||
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
|
||||
COSTS_N_INSNS (1), /* variable shift costs */
|
||||
COSTS_N_INSNS (1), /* constant shift costs */
|
||||
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
|
||||
COSTS_N_INSNS (4), /* HI */
|
||||
COSTS_N_INSNS (3), /* SI */
|
||||
COSTS_N_INSNS (4), /* DI */
|
||||
COSTS_N_INSNS (2)}, /* other */
|
||||
0, /* cost of multiply per each bit set */
|
||||
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
|
||||
COSTS_N_INSNS (26), /* HI */
|
||||
COSTS_N_INSNS (42), /* SI */
|
||||
COSTS_N_INSNS (74), /* DI */
|
||||
COSTS_N_INSNS (74)}, /* other */
|
||||
COSTS_N_INSNS (1), /* cost of movsx */
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 12}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{6, 6, 8}, /* cost of loading integer registers */
|
||||
2, /* cost of moving MMX register */
|
||||
{8, 8}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{8, 8, 8}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{8, 8, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
64, /* size of prefetch block */
|
||||
6, /* number of parallel prefetches */
|
||||
3, /* Branch cost */
|
||||
COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (8), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
|
||||
/* Processor feature/optimization bitmasks. */
|
||||
@ -600,52 +712,81 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
#define m_K8 (1<<PROCESSOR_K8)
|
||||
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
|
||||
#define m_NOCONA (1<<PROCESSOR_NOCONA)
|
||||
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
|
||||
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
|
||||
#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
|
||||
|
||||
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
|
||||
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
/* Generic instruction choice should be common subset of supported CPUs
|
||||
(PPro/PENT4/NOCONA/Athlon/K8). */
|
||||
|
||||
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
|
||||
Generic64 seems like good code size tradeoff. We can't enable it for 32bit
|
||||
generic because it is not working well with PPro base chips. */
|
||||
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
|
||||
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
||||
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
|
||||
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
|
||||
const int x86_double_with_add = ~m_386;
|
||||
const int x86_use_bit_test = m_386;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
|
||||
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
|
||||
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_fisttp = m_NOCONA;
|
||||
const int x86_3dnow_a = m_ATHLON_K8;
|
||||
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
/* Branch hints were put in P4 based on simulation result. But
|
||||
after P4 was made, no performance benefit was observed with
|
||||
branch hints. It also increases the code size. As the result,
|
||||
icc never generates branch hints. */
|
||||
const int x86_branch_hints = 0;
|
||||
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
|
||||
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
|
||||
/* We probably ought to watch for partial register stalls on Generic32
|
||||
compilation setting as well. However in current implementation the
|
||||
partial register stalls are not eliminated very well - they can
|
||||
be introduced via subregs synthetized by combine and can happen
|
||||
in caller/callee saving sequences.
|
||||
Because this option pays back little on PPro based chips and is in conflict
|
||||
with partial reg. dependencies used by Athlon/P4 based chips, it is better
|
||||
to leave it off for generic32 for now. */
|
||||
const int x86_partial_reg_stall = m_PPRO;
|
||||
const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
|
||||
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
|
||||
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
|
||||
const int x86_use_mov0 = m_K6;
|
||||
const int x86_use_cltd = ~(m_PENT | m_K6);
|
||||
const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
|
||||
const int x86_read_modify_write = ~m_PENT;
|
||||
const int x86_read_modify = ~(m_PENT | m_PPRO);
|
||||
const int x86_split_long_moves = m_PPRO;
|
||||
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
|
||||
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
|
||||
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
|
||||
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
|
||||
const int x86_qimode_math = ~(0);
|
||||
const int x86_promote_qi_regs = 0;
|
||||
/* On PPro this flag is meant to avoid partial register stalls. Just like
|
||||
the x86_partial_reg_stall this option might be considered for Generic32
|
||||
if our scheme for avoiding partial stalls was more effective. */
|
||||
const int x86_himode_math = ~(m_PPRO);
|
||||
const int x86_promote_hi_regs = m_PPRO;
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
|
||||
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
|
||||
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
|
||||
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
|
||||
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
|
||||
const int x86_shift1 = ~m_486;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
/* In Generic model we have an confict here in between PPro/Pentium4 based chips
|
||||
that thread 128bit SSE registers as single units versus K8 based chips that
|
||||
divide SSE registers to two 64bit halves.
|
||||
x86_sse_partial_reg_dependency promote all store destinations to be 128bit
|
||||
to allow register renaming on 128bit SSE units, but usually results in one
|
||||
extra microop on 64bit SSE units. Experimental results shows that disabling
|
||||
this option on P4 brings over 20% SPECfp regression, while enabling it on
|
||||
K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
|
||||
of moves. */
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
|
||||
/* Set for machines where the type and dependencies are resolved on SSE
|
||||
register parts instead of whole registers, so we may maintain just
|
||||
lower part of scalar values in proper format leaving the upper part
|
||||
@ -655,16 +796,17 @@ const int x86_sse_typeless_stores = m_ATHLON_K8;
|
||||
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
|
||||
const int x86_use_ffreep = m_ATHLON_K8;
|
||||
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
|
||||
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
|
||||
|
||||
/* ??? Allowing interunit moves makes it all too easy for the compiler to put
|
||||
integer data in xmm registers. Which results in pretty abysmal code. */
|
||||
const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
|
||||
|
||||
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
|
||||
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
|
||||
/* Some CPU cores are not able to predict more than 4 branch instructions in
|
||||
the 16 byte window. */
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
|
||||
const int x86_use_bt = m_ATHLON_K8;
|
||||
/* Compare and exchange was added for 80486. */
|
||||
const int x86_cmpxchg = ~m_386;
|
||||
@ -674,6 +816,7 @@ const int x86_cmpxchg8b = ~(m_386 | m_486);
|
||||
const int x86_cmpxchg16b = m_NOCONA;
|
||||
/* Exchange and add was added for 80486. */
|
||||
const int x86_xadd = ~m_386;
|
||||
const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
|
||||
|
||||
/* In case the average insn count for single function invocation is
|
||||
lower than this constant, emit fast (but longer) prologue and
|
||||
@ -1291,7 +1434,9 @@ override_options (void)
|
||||
{&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
|
||||
{&k8_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
|
||||
{&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
|
||||
{&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
|
||||
};
|
||||
|
||||
static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
|
||||
@ -1359,6 +1504,8 @@ override_options (void)
|
||||
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
|
||||
{"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
|
||||
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
|
||||
{"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
|
||||
{"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
|
||||
};
|
||||
|
||||
int const pta_size = ARRAY_SIZE (processor_alias_table);
|
||||
@ -1388,15 +1535,52 @@ override_options (void)
|
||||
flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
|
||||
}
|
||||
|
||||
if (!ix86_tune_string && ix86_arch_string)
|
||||
ix86_tune_string = ix86_arch_string;
|
||||
if (!ix86_tune_string)
|
||||
/* Need to check -mtune=generic first. */
|
||||
if (ix86_tune_string)
|
||||
{
|
||||
ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
|
||||
ix86_tune_defaulted = 1;
|
||||
if (!strcmp (ix86_tune_string, "generic")
|
||||
|| !strcmp (ix86_tune_string, "i686"))
|
||||
{
|
||||
if (TARGET_64BIT)
|
||||
ix86_tune_string = "generic64";
|
||||
else
|
||||
ix86_tune_string = "generic32";
|
||||
}
|
||||
else if (!strncmp (ix86_tune_string, "generic", 7))
|
||||
error ("bad value (%s) for -mtune= switch", ix86_tune_string);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ix86_arch_string)
|
||||
ix86_tune_string = ix86_arch_string;
|
||||
if (!ix86_tune_string)
|
||||
{
|
||||
ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
|
||||
ix86_tune_defaulted = 1;
|
||||
}
|
||||
|
||||
/* ix86_tune_string is set to ix86_arch_string or defaulted. We
|
||||
need to use a sensible tune option. */
|
||||
if (!strcmp (ix86_tune_string, "generic")
|
||||
|| !strcmp (ix86_tune_string, "x86-64")
|
||||
|| !strcmp (ix86_tune_string, "i686"))
|
||||
{
|
||||
if (TARGET_64BIT)
|
||||
ix86_tune_string = "generic64";
|
||||
else
|
||||
ix86_tune_string = "generic32";
|
||||
}
|
||||
}
|
||||
if (!strcmp (ix86_tune_string, "x86-64"))
|
||||
warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
|
||||
"-mtune=generic instead as appropriate.");
|
||||
|
||||
if (!ix86_arch_string)
|
||||
ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
|
||||
if (!strcmp (ix86_arch_string, "generic"))
|
||||
error ("generic CPU can be used only for -mtune= switch");
|
||||
if (!strncmp (ix86_arch_string, "generic", 7))
|
||||
error ("bad value (%s) for -march= switch", ix86_arch_string);
|
||||
|
||||
if (ix86_cmodel_string != 0)
|
||||
{
|
||||
@ -13164,6 +13348,8 @@ ix86_issue_rate (void)
|
||||
case PROCESSOR_ATHLON:
|
||||
case PROCESSOR_K8:
|
||||
case PROCESSOR_NOCONA:
|
||||
case PROCESSOR_GENERIC32:
|
||||
case PROCESSOR_GENERIC64:
|
||||
return 3;
|
||||
|
||||
default:
|
||||
@ -13356,6 +13542,8 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
||||
|
||||
case PROCESSOR_ATHLON:
|
||||
case PROCESSOR_K8:
|
||||
case PROCESSOR_GENERIC32:
|
||||
case PROCESSOR_GENERIC64:
|
||||
memory = get_attr_memory (insn);
|
||||
|
||||
/* Show ability of reorder buffer to hide latency of load by executing
|
||||
@ -17326,7 +17514,7 @@ ix86_pad_returns (void)
|
||||
static void
|
||||
ix86_reorg (void)
|
||||
{
|
||||
if (TARGET_ATHLON_K8 && optimize && !optimize_size)
|
||||
if (TARGET_PAD_RETURNS && optimize && !optimize_size)
|
||||
ix86_pad_returns ();
|
||||
if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
|
||||
ix86_avoid_jump_misspredicts ();
|
||||
|
@ -93,11 +93,7 @@ extern const struct processor_costs *ix86_cost;
|
||||
/* configure can arrange to make this 2, to force a 486. */
|
||||
|
||||
#ifndef TARGET_CPU_DEFAULT
|
||||
#ifdef TARGET_64BIT_DEFAULT
|
||||
#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_k8
|
||||
#else
|
||||
#define TARGET_CPU_DEFAULT 0
|
||||
#endif
|
||||
#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_generic
|
||||
#endif
|
||||
|
||||
#ifndef TARGET_FPMATH_DEFAULT
|
||||
@ -140,6 +136,9 @@ extern const struct processor_costs *ix86_cost;
|
||||
#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
|
||||
#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
|
||||
#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
|
||||
#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
|
||||
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
|
||||
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
|
||||
|
||||
#define TUNEMASK (1 << ix86_tune)
|
||||
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
|
||||
@ -163,6 +162,8 @@ extern const int x86_use_ffreep;
|
||||
extern const int x86_inter_unit_moves, x86_schedule;
|
||||
extern const int x86_use_bt;
|
||||
extern const int x86_cmpxchg, x86_cmpxchg8b, x86_cmpxchg16b, x86_xadd;
|
||||
extern const int x86_use_incdec;
|
||||
extern const int x86_pad_returns;
|
||||
extern int x86_prefetch_sse;
|
||||
|
||||
#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
|
||||
@ -217,6 +218,8 @@ extern int x86_prefetch_sse;
|
||||
#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
|
||||
#define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
|
||||
#define TARGET_USE_BT (x86_use_bt & TUNEMASK)
|
||||
#define TARGET_USE_INCDEC (x86_use_incdec & TUNEMASK)
|
||||
#define TARGET_PAD_RETURNS (x86_pad_returns & TUNEMASK)
|
||||
|
||||
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
|
||||
|
||||
@ -464,12 +467,14 @@ extern int x86_prefetch_sse;
|
||||
#define TARGET_CPU_DEFAULT_pentium_m 14
|
||||
#define TARGET_CPU_DEFAULT_prescott 15
|
||||
#define TARGET_CPU_DEFAULT_nocona 16
|
||||
#define TARGET_CPU_DEFAULT_generic 17
|
||||
|
||||
#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
|
||||
"pentiumpro", "pentium2", "pentium3", \
|
||||
"pentium4", "k6", "k6-2", "k6-3",\
|
||||
"athlon", "athlon-4", "k8", \
|
||||
"pentium-m", "prescott", "nocona"}
|
||||
"pentium-m", "prescott", "nocona", \
|
||||
"generic"}
|
||||
|
||||
#ifndef CC1_SPEC
|
||||
#define CC1_SPEC "%(cc1_cpu) "
|
||||
@ -2119,6 +2124,8 @@ enum processor_type
|
||||
PROCESSOR_PENTIUM4,
|
||||
PROCESSOR_K8,
|
||||
PROCESSOR_NOCONA,
|
||||
PROCESSOR_GENERIC32,
|
||||
PROCESSOR_GENERIC64,
|
||||
PROCESSOR_max
|
||||
};
|
||||
|
||||
|
@ -187,7 +187,7 @@
|
||||
|
||||
;; Processor type. This attribute must exactly match the processor_type
|
||||
;; enumeration in i386.h.
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona"
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,generic32,generic64"
|
||||
(const (symbol_ref "ix86_tune")))
|
||||
|
||||
;; A basic instruction type. Refinements due to arguments to be
|
||||
@ -1511,8 +1511,12 @@
|
||||
(const_string "SI")
|
||||
(and (eq_attr "type" "imov")
|
||||
(and (eq_attr "alternative" "0,1")
|
||||
(ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
|
||||
(const_int 0))))
|
||||
(and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
|
||||
(const_int 0))
|
||||
(and (eq (symbol_ref "optimize_size")
|
||||
(const_int 0))
|
||||
(eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
|
||||
(const_int 0))))))
|
||||
(const_string "SI")
|
||||
;; Avoid partial register stalls when not using QImode arithmetic
|
||||
(and (eq_attr "type" "imov")
|
||||
@ -4145,7 +4149,7 @@
|
||||
[(match_scratch:DF 2 "Y")
|
||||
(set (match_operand:SSEMODEI24 0 "register_operand" "")
|
||||
(fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
|
||||
"TARGET_K8 && !optimize_size"
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
|
||||
[(set (match_dup 2) (match_dup 1))
|
||||
(set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
|
||||
"")
|
||||
@ -4154,7 +4158,7 @@
|
||||
[(match_scratch:SF 2 "x")
|
||||
(set (match_operand:SSEMODEI24 0 "register_operand" "")
|
||||
(fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
|
||||
"TARGET_K8 && !optimize_size"
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
|
||||
[(set (match_dup 2) (match_dup 1))
|
||||
(set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
|
||||
"")
|
||||
@ -19896,7 +19900,7 @@
|
||||
(mult:DI (match_operand:DI 1 "memory_operand" "")
|
||||
(match_operand:DI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_K8 && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
&& (GET_CODE (operands[2]) != CONST_INT
|
||||
|| !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
@ -19910,7 +19914,7 @@
|
||||
(mult:SI (match_operand:SI 1 "memory_operand" "")
|
||||
(match_operand:SI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_K8 && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
&& (GET_CODE (operands[2]) != CONST_INT
|
||||
|| !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
@ -19925,7 +19929,7 @@
|
||||
(mult:SI (match_operand:SI 1 "memory_operand" "")
|
||||
(match_operand:SI 2 "immediate_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_K8 && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
&& (GET_CODE (operands[2]) != CONST_INT
|
||||
|| !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
@ -19943,7 +19947,7 @@
|
||||
(match_operand:DI 2 "const_int_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(match_scratch:DI 3 "r")]
|
||||
"TARGET_K8 && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
&& CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
|
||||
[(set (match_dup 3) (match_dup 2))
|
||||
(parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
|
||||
@ -19959,7 +19963,7 @@
|
||||
(match_operand:SI 2 "const_int_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(match_scratch:SI 3 "r")]
|
||||
"TARGET_K8 && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
&& CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
|
||||
[(set (match_dup 3) (match_dup 2))
|
||||
(parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
|
||||
@ -19975,7 +19979,7 @@
|
||||
(match_operand:HI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(match_scratch:HI 3 "r")]
|
||||
"TARGET_K8 && !optimize_size"
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
|
||||
[(set (match_dup 3) (match_dup 2))
|
||||
(parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
|
@ -137,25 +137,25 @@
|
||||
;; on decoder 0, and say that it takes a little while before the result
|
||||
;; is available.
|
||||
(define_insn_reservation "ppro_complex_insn" 6
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(eq_attr "type" "other,multi,call,callv,str"))
|
||||
"decoder0")
|
||||
|
||||
;; imov with memory operands does not use the integer units.
|
||||
(define_insn_reservation "ppro_imov" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "imov")))
|
||||
"decodern,(p0|p1)")
|
||||
|
||||
(define_insn_reservation "ppro_imov_load" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "imov")))
|
||||
"decodern,p2")
|
||||
|
||||
(define_insn_reservation "ppro_imov_store" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "imov")))
|
||||
"decoder0,p4+p3")
|
||||
@ -163,20 +163,20 @@
|
||||
;; imovx always decodes to one uop, and also doesn't use the integer
|
||||
;; units if it has memory operands.
|
||||
(define_insn_reservation "ppro_imovx" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "imovx")))
|
||||
"decodern,(p0|p1)")
|
||||
|
||||
(define_insn_reservation "ppro_imovx_load" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "imovx")))
|
||||
"decodern,p2")
|
||||
|
||||
;; lea executes on port 0 with latency one and throughput 1.
|
||||
(define_insn_reservation "ppro_lea" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "lea")))
|
||||
"decodern,p0")
|
||||
@ -185,19 +185,19 @@
|
||||
;; The load and store units need to be reserved when memory operands
|
||||
;; are involved.
|
||||
(define_insn_reservation "ppro_shift_rotate" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ishift,ishift1,rotate,rotate1")))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_shift_rotate_mem" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(eq_attr "type" "ishift,ishift1,rotate,rotate1")))
|
||||
"decoder0,p2+p0,p4+p3")
|
||||
|
||||
(define_insn_reservation "ppro_cld" 2
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(eq_attr "type" "cld"))
|
||||
"decoder0,(p0+p1)*2")
|
||||
|
||||
@ -219,32 +219,32 @@
|
||||
;; results because we can assume these instructions can decode on all
|
||||
;; decoders.
|
||||
(define_insn_reservation "ppro_branch" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ibr")))
|
||||
"decodern,p1")
|
||||
|
||||
;; ??? Indirect branches probably have worse latency than this.
|
||||
(define_insn_reservation "ppro_indirect_branch" 6
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(eq_attr "type" "ibr")))
|
||||
"decoder0,p2+p1")
|
||||
|
||||
(define_insn_reservation "ppro_leave" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(eq_attr "type" "leave"))
|
||||
"decoder0,p2+(p0|p1),(p0|p1)")
|
||||
|
||||
;; imul has throughput one, but latency 4, and can only execute on port 0.
|
||||
(define_insn_reservation "ppro_imul" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "imul")))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_imul_mem" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(eq_attr "type" "imul")))
|
||||
"decoder0,p2+p0")
|
||||
@ -253,42 +253,42 @@
|
||||
;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
|
||||
;; These issue latencies are modelled via the ppro_div automaton.
|
||||
(define_insn_reservation "ppro_idiv_QI" 19
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "QI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"decoder0,(p0+idiv)*2,(p0|p1)+idiv,idiv*9")
|
||||
|
||||
(define_insn_reservation "ppro_idiv_QI_load" 19
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "QI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*9")
|
||||
|
||||
(define_insn_reservation "ppro_idiv_HI" 23
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*17")
|
||||
|
||||
(define_insn_reservation "ppro_idiv_HI_load" 23
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*18")
|
||||
|
||||
(define_insn_reservation "ppro_idiv_SI" 39
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*33")
|
||||
|
||||
(define_insn_reservation "ppro_idiv_SI_load" 39
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
@ -299,85 +299,85 @@
|
||||
;; has throughput "1/cycle (align with FADD)". What do they
|
||||
;; mean and how can we model that?
|
||||
(define_insn_reservation "ppro_fop" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(eq_attr "type" "fop")))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_fop_load" 5
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "fop")))
|
||||
"decoder0,p2+p0,p0")
|
||||
|
||||
(define_insn_reservation "ppro_fop_store" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "fop")))
|
||||
"decoder0,p0,p0,p0+p4+p3")
|
||||
|
||||
(define_insn_reservation "ppro_fop_both" 5
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "both")
|
||||
(eq_attr "type" "fop")))
|
||||
"decoder0,p2+p0,p0+p4+p3")
|
||||
|
||||
(define_insn_reservation "ppro_fsgn" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(eq_attr "type" "fsgn"))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_fistp" 5
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(eq_attr "type" "fistp"))
|
||||
"decoder0,p0*2,p4+p3")
|
||||
|
||||
(define_insn_reservation "ppro_fcmov" 2
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(eq_attr "type" "fcmov"))
|
||||
"decoder0,p0*2")
|
||||
|
||||
(define_insn_reservation "ppro_fcmp" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "fcmp")))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_fcmp_load" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "fcmp")))
|
||||
"decoder0,p2+p0")
|
||||
|
||||
(define_insn_reservation "ppro_fmov" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "fmov")))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_fmov_load" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "!XF")
|
||||
(eq_attr "type" "fmov"))))
|
||||
"decodern,p2")
|
||||
|
||||
(define_insn_reservation "ppro_fmov_XF_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "XF")
|
||||
(eq_attr "type" "fmov"))))
|
||||
"decoder0,(p2+p0)*2")
|
||||
|
||||
(define_insn_reservation "ppro_fmov_store" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "store")
|
||||
(and (eq_attr "mode" "!XF")
|
||||
(eq_attr "type" "fmov"))))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_fmov_XF_store" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "store")
|
||||
(and (eq_attr "mode" "XF")
|
||||
(eq_attr "type" "fmov"))))
|
||||
@ -386,13 +386,13 @@
|
||||
;; fmul executes on port 0 with latency 5. It has issue latency 2,
|
||||
;; but we don't model this.
|
||||
(define_insn_reservation "ppro_fmul" 5
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "fmul")))
|
||||
"decoder0,p0*2")
|
||||
|
||||
(define_insn_reservation "ppro_fmul_load" 6
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "fmul")))
|
||||
"decoder0,p2+p0,p0")
|
||||
@ -403,42 +403,42 @@
|
||||
;; that. Throughput is equal to latency - 1, which we model using the
|
||||
;; ppro_div automaton.
|
||||
(define_insn_reservation "ppro_fdiv_SF" 18
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "fdiv,fpspc"))))
|
||||
"decodern,p0+fdiv,fdiv*16")
|
||||
|
||||
(define_insn_reservation "ppro_fdiv_SF_load" 19
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "fdiv,fpspc"))))
|
||||
"decoder0,p2+p0+fdiv,fdiv*16")
|
||||
|
||||
(define_insn_reservation "ppro_fdiv_DF" 32
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "DF")
|
||||
(eq_attr "type" "fdiv,fpspc"))))
|
||||
"decodern,p0+fdiv,fdiv*30")
|
||||
|
||||
(define_insn_reservation "ppro_fdiv_DF_load" 33
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "DF")
|
||||
(eq_attr "type" "fdiv,fpspc"))))
|
||||
"decoder0,p2+p0+fdiv,fdiv*30")
|
||||
|
||||
(define_insn_reservation "ppro_fdiv_XF" 38
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "XF")
|
||||
(eq_attr "type" "fdiv,fpspc"))))
|
||||
"decodern,p0+fdiv,fdiv*36")
|
||||
|
||||
(define_insn_reservation "ppro_fdiv_XF_load" 39
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "XF")
|
||||
(eq_attr "type" "fdiv,fpspc"))))
|
||||
@ -456,31 +456,31 @@
|
||||
;; so they behave as "simple" instructions that need no special modelling.
|
||||
;; We only have to model mmxshft and mmxmul.
|
||||
(define_insn_reservation "ppro_mmx_shft" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "mmxshft")))
|
||||
"decodern,p1")
|
||||
|
||||
(define_insn_reservation "ppro_mmx_shft_load" 2
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "mmxshft")))
|
||||
"decoder0,p2+p1")
|
||||
|
||||
(define_insn_reservation "ppro_mmx_mul" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "mmxmul")))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_mmx_mul_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "mmxmul")))
|
||||
"decoder0,p2+p0")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mmxcvt" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "type" "mmxcvt")))
|
||||
"decodern,p1")
|
||||
@ -488,7 +488,7 @@
|
||||
;; FIXME: These are Pentium III only, but we cannot tell here if
|
||||
;; we're generating code for PentiumPro/Pentium II or Pentium III
|
||||
;; (define_insn_reservation "ppro_sse_mmxshft" 2
|
||||
;; (and (eq_attr "cpu" "pentiumpro")
|
||||
;; (and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
;; (and (eq_attr "mode" "DI")
|
||||
;; (eq_attr "type" "mmxshft")))
|
||||
;; "decodern,p0")
|
||||
@ -499,69 +499,69 @@
|
||||
|
||||
;; The sfence instruction.
|
||||
(define_insn_reservation "ppro_sse_sfence" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "unknown")
|
||||
(eq_attr "type" "sse")))
|
||||
"decoder0,p4+p3")
|
||||
|
||||
;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
|
||||
(define_insn_reservation "ppro_sse_SF" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "sse")))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_sse_add_SF" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "sseadd"))))
|
||||
"decodern,p1")
|
||||
|
||||
(define_insn_reservation "ppro_sse_add_SF_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "sseadd"))))
|
||||
"decoder0,p2+p1")
|
||||
|
||||
(define_insn_reservation "ppro_sse_cmp_SF" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssecmp"))))
|
||||
"decoder0,p1")
|
||||
|
||||
(define_insn_reservation "ppro_sse_cmp_SF_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssecmp"))))
|
||||
"decoder0,p2+p1")
|
||||
|
||||
(define_insn_reservation "ppro_sse_comi_SF" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssecomi"))))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_sse_comi_SF_load" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssecomi"))))
|
||||
"decoder0,p2+p0")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mul_SF" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssemul"))))
|
||||
"decodern,p0")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mul_SF_load" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssemul"))))
|
||||
@ -569,109 +569,109 @@
|
||||
|
||||
;; FIXME: ssediv doesn't close p0 for 17 cycles, surely???
|
||||
(define_insn_reservation "ppro_sse_div_SF" 18
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"decoder0,p0*17")
|
||||
|
||||
(define_insn_reservation "ppro_sse_div_SF_load" 18
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"decoder0,(p2+p0),p0*16")
|
||||
|
||||
(define_insn_reservation "ppro_sse_icvt_SF" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "sseicvt")))
|
||||
"decoder0,(p2+p1)*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_icvt_SI" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(eq_attr "type" "sseicvt")))
|
||||
"decoder0,(p2+p1)")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mov_SF" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"decoder0,(p0|p1)")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mov_SF_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"decoder0,p2+(p0|p1)")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mov_SF_store" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "store")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"decoder0,p4+p3")
|
||||
|
||||
(define_insn_reservation "ppro_sse_V4SF" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "sse")))
|
||||
"decoder0,p1*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_add_V4SF" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "sseadd"))))
|
||||
"decoder0,p1*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_add_V4SF_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "sseadd"))))
|
||||
"decoder0,(p2+p1)*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_cmp_V4SF" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssecmp"))))
|
||||
"decoder0,p1*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_cmp_V4SF_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssecmp"))))
|
||||
"decoder0,(p2+p1)*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_cvt_V4SF" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssecvt"))))
|
||||
"decoder0,p1*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "!none,unknown")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssecmp"))))
|
||||
"decoder0,p1,p4+p3")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mul_V4SF" 5
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssemul"))))
|
||||
"decoder0,p0*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mul_V4SF_load" 5
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssemul"))))
|
||||
@ -679,49 +679,49 @@
|
||||
|
||||
;; FIXME: p0 really closed this long???
|
||||
(define_insn_reservation "ppro_sse_div_V4SF" 48
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"decoder0,p0*34")
|
||||
|
||||
(define_insn_reservation "ppro_sse_div_V4SF_load" 48
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"decoder0,(p2+p0)*2,p0*32")
|
||||
|
||||
(define_insn_reservation "ppro_sse_log_V4SF" 2
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "sselog,sselog1"))))
|
||||
"decodern,p1")
|
||||
|
||||
(define_insn_reservation "ppro_sse_log_V4SF_load" 2
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "sselog,sselog1"))))
|
||||
"decoder0,(p2+p1)")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mov_V4SF" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"decoder0,(p0|p1)*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mov_V4SF_load" 2
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"decoder0,p2*2")
|
||||
|
||||
(define_insn_reservation "ppro_sse_mov_V4SF_store" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "store")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
@ -735,7 +735,7 @@
|
||||
;; reg-reg instructions produce 1 uop so they can be decoded on any of
|
||||
;; the three decoders.
|
||||
(define_insn_reservation "ppro_insn" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
|
||||
"decodern,(p0|p1)")
|
||||
@ -743,13 +743,13 @@
|
||||
;; read-modify and register-memory instructions have 2 or three uops,
|
||||
;; so they have to be decoded on decoder0.
|
||||
(define_insn_reservation "ppro_insn_load" 3
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
|
||||
"decoder0,p2+(p0|p1)")
|
||||
|
||||
(define_insn_reservation "ppro_insn_store" 1
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
|
||||
"decoder0,(p0|p1),p4+p3")
|
||||
@ -757,7 +757,7 @@
|
||||
;; read-modify-store instructions produce 4 uops so they have to be
|
||||
;; decoded on decoder0 as well.
|
||||
(define_insn_reservation "ppro_insn_both" 4
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(and (eq_attr "cpu" "pentiumpro,generic32")
|
||||
(and (eq_attr "memory" "both")
|
||||
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
|
||||
"decoder0,p2+(p0|p1),p4+p3")
|
||||
|
@ -628,7 +628,7 @@
|
||||
{
|
||||
/* On Pentium4, the inc and dec operations causes extra dependency on flag
|
||||
registers, since carry flag is not set. */
|
||||
if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
|
||||
if (!TARGET_USE_INCDEC && !optimize_size)
|
||||
return 0;
|
||||
return op == const1_rtx || op == constm1_rtx;
|
||||
})
|
||||
@ -707,6 +707,11 @@
|
||||
if (GET_CODE (op) != MEM)
|
||||
return 1;
|
||||
|
||||
/* All patterns using aligned_operand on memory operands ends up
|
||||
in promoting memory operand to 64bit and thus causing memory mismatch. */
|
||||
if (TARGET_MEMORY_MISMATCH_STALL && !optimize_size)
|
||||
return 0;
|
||||
|
||||
/* Don't even try to do any aligned optimizations with volatiles. */
|
||||
if (MEM_VOLATILE_P (op))
|
||||
return 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user