re PR c/10308 ([x86] ICE with -O -fgcse or -O2)

* cfgbuild.c (make_edges):  Do not use next_nonnote_insn when
	looking for fallthru edge.

	* athlon.md (athlon-agu, athlon-store, athlon-fany, athlon-faddmul):
	Fix.
	(athlon-load2, athlon-store2, athlon-fpsched, athlon-fpload,
	athlon-fvector): New.
	(athlon_*): Revisit to match new optimization guide.
	* i386.c (ix86_adjust_cost):  Fix memory operand costs on Athlon/k8
	* i386.md (cvt??2?? patterns): Fix modes.
	(fistp patterns): Set modes.

	Accidentaly commited with my earlier reload patch:
	PR c/10308
	* reload.c (find_reloads_address_1): Reload plus at the place of
	index register.

From-SVN: r66037
This commit is contained in:
Jan Hubicka 2003-04-24 17:53:22 +02:00 committed by Jan Hubicka
parent 796621e8c2
commit 26f74aa3f7
6 changed files with 464 additions and 204 deletions

View File

@ -1,3 +1,22 @@
Thu Apr 24 16:55:26 CEST 2003 Jan Hubicka <jh@suse.cz>
* cfgbuild.c (make_edges): Do not use next_nonnote_insn when
looking for fallthru edge.
* athlon.md (athlon-agu, athlon-store, athlon-fany, athlon-faddmul):
Fix.
(athlon-load2, athlon-store2, athlon-fpsched, athlon-fpload,
athlon-fvector): New.
(athlon_*): Revisit to match new optimization guide.
* i386.c (ix86_adjust_cost): Fix memory operand costs on Athlon/k8
* i386.md (cvt??2?? patterns): Fix modes.
(fistp patterns): Set modes.
Accidentaly commited with my earlier reload patch:
PR c/10308
* reload.c (find_reloads_address_1): Reload plus at the place of
index register.
2003-04-24 Nathan Sidwell <nathan@codesourcery.com>
New GCOV_TAG_FUNCTION layout

View File

@ -439,15 +439,17 @@ make_edges (label_value_list, min, max, update_p)
}
/* Find out if we can drop through to the next block. */
insn = next_nonnote_insn (insn);
insn = NEXT_INSN (insn);
while (insn
&& GET_CODE (insn) == NOTE
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK)
insn = NEXT_INSN (insn);
if (!insn || (bb->next_bb == EXIT_BLOCK_PTR && force_fallthru))
cached_make_edge (edge_cache, bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU);
else if (bb->next_bb != EXIT_BLOCK_PTR)
{
rtx tmp = bb->next_bb->head;
if (GET_CODE (tmp) == NOTE)
tmp = next_nonnote_insn (tmp);
if (force_fallthru || insn == tmp)
if (force_fallthru || insn == bb->next_bb->head)
cached_make_edge (edge_cache, bb, bb->next_bb, EDGE_FALLTHRU);
}
}

View File

@ -89,62 +89,84 @@
;(define_cpu_unit "athlon-agu1" "athlon_agu")
;(define_cpu_unit "athlon-agu2" "athlon_agu")
;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
(define_reservation "athlon-agu" "nothing,nothing")
(define_reservation "athlon-agu" "nothing")
(define_cpu_unit "athlon-mult" "athlon_mult")
(define_cpu_unit "athlon-load0" "athlon_load")
(define_cpu_unit "athlon-load1" "athlon_load")
(define_reservation "athlon-load" "athlon-agu,
(athlon-load0 | athlon-load1)")
(define_reservation "athlon-store" "nothing")
(athlon-load0 | athlon-load1),nothing")
;; 128bit SSE instructions issue two loads at once
(define_reservation "athlon-load2" "athlon-agu,
(athlon-load0 + athlon-load1),nothing")
(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
;; 128bit SSE instructions issue two stores at once
(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
;; The FP operations start to execute at stage 12 in the pipeline, while
;; integer operations start to execute at stage 9 for Athlon and 11 for K8
;; Compensate the difference for Athlon because it results in significantly
;; smaller automata.
(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
;; The floating point loads.
(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
;; The three fp units are fully pipelined with latency of 3
(define_cpu_unit "athlon-fadd" "athlon_fp")
(define_cpu_unit "athlon-fmul" "athlon_fp")
(define_cpu_unit "athlon-fstore" "athlon_fp")
(define_reservation "athlon-fany" "(athlon-fadd | athlon-fmul | athlon-fstore)")
(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
(define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)")
;; Vector operations usually consume many of pipes.
(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
;; Jump instructions are executed in the branch unit completely transparent to us
(define_insn_reservation "athlon_branch" 0
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "ibr"))
"athlon-direct")
"athlon-direct,athlon-ieu")
(define_insn_reservation "athlon_call" 0
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "call,callv"))
"athlon-vector")
"athlon-vector,athlon-ieu")
;; Latency of push operation is 3 cycles, but ESP value is available
;; earlier
(define_insn_reservation "athlon_push" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "push"))
"athlon-direct,nothing,athlon-store")
"athlon-direct,athlon-agu,athlon-store")
(define_insn_reservation "athlon_pop" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "pop"))
"athlon-vector,athlon-ieu,athlon-load")
"athlon-vector,athlon-load,athlon-ieu")
(define_insn_reservation "athlon_pop_k8" 3
(and (eq_attr "cpu" "k8")
(eq_attr "type" "pop"))
"athlon-double,athlon-ieu,athlon-load")
"athlon-double,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "leave"))
"athlon-vector,athlon-load")
"athlon-vector,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave_k8" 3
(and (eq_attr "cpu" "k8")
(eq_attr "type" "leave"))
"athlon-double,athlon-load")
"athlon-double,(athlon-ieu+athlon-load)")
;; Lea executes in AGU unit with 2 cycles latency.
(define_insn_reservation "athlon_lea" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "lea"))
"athlon-direct,athlon-agu")
"athlon-direct,athlon-agu,nothing")
;; Mul executes in special multiplier unit attached to IEU0
(define_insn_reservation "athlon_imul" 5
@ -180,21 +202,31 @@
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
(define_insn_reservation "athlon_idiv" 42
;; Idiv can not execute in parallel with other instructions. Dealing with it
;; as with short latency vector instruction is good approximation avoiding
;; scheduler from trying too hard to can hide it's latency by overlap with
;; other instructions.
;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
;; of the other code
(define_insn_reservation "athlon_idiv" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "none,unknown")))
"athlon-vector,athlon-ieu*42")
(define_insn_reservation "athlon_idiv_mem" 45
"athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
(define_insn_reservation "athlon_idiv_mem" 9
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "load,both")))
"athlon-vector,athlon-load,athlon-ieu*42")
(define_insn_reservation "athlon_str" 15
"athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
;; The paralelism of string instructions is not documented. Model it same way
;; as idiv to create smaller automata. This probably does not matter much.
(define_insn_reservation "athlon_str" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "str")
(eq_attr "memory" "load,both,store")))
"athlon-vector,athlon-load,athlon-ieu*10")
"athlon-vector,athlon-load,athlon-ieu0*6")
(define_insn_reservation "athlon_idirect" 1
(and (eq_attr "cpu" "athlon,k8")
@ -235,28 +267,31 @@
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "both"))))
"athlon-direct,athlon-load,athlon-ieu,
"athlon-direct,athlon-load,
athlon-ieu,athlon-store,
athlon-store")
(define_insn_reservation "athlon_ivector_both" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "both"))))
"athlon-vector,athlon-load,athlon-ieu,athlon-ieu,
"athlon-vector,athlon-load,
athlon-ieu,
athlon-ieu,
athlon-store")
(define_insn_reservation "athlon_idirect_store" 1
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "store"))))
"athlon-direct,athlon-ieu,
"athlon-direct,(athlon-ieu+athlon-agu),
athlon-store")
(define_insn_reservation "athlon_ivector_store" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "store"))))
"athlon-vector,athlon-ieu,athlon-ieu,
"athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
athlon-store")
;; Athlon floatin point unit
@ -265,401 +300,570 @@
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
"athlon-vector,athlon-fany")
"athlon-vector,athlon-fpload2,athlon-fvector*9")
(define_insn_reservation "athlon_fldxf_k8" 13
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
"athlon-vector,athlon-fany")
(define_insn_reservation "athlon_fld" 6
"athlon-vector,athlon-fpload2k8,athlon-fvector*9")
;; Assume superforwarding to take place so effective latency of fany op is 0.
(define_insn_reservation "athlon_fld" 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fany,nothing,athlon-load")
(define_insn_reservation "athlon_fld_k8" 4
"athlon-direct,athlon-fpload,athlon-fany")
(define_insn_reservation "athlon_fld_k8" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fany,athlon-load")
"athlon-direct,athlon-fploadk8,athlon-fstore")
(define_insn_reservation "athlon_fstxf" 10
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
"athlon-vector,athlon-fstore")
"athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
(define_insn_reservation "athlon_fstxf_k8" 8
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
"athlon-vector,athlon-fstore")
"athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
(define_insn_reservation "athlon_fst" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
"athlon-direct,athlon-fstore,nothing,athlon-store")
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fst_k8" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
"athlon-direct,athlon-fstore,athlon-store")
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fist" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fistp"))
"athlon-direct,athlon-fstore,nothing")
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fmov" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fmov"))
"athlon-direct,athlon-faddmul")
(define_insn_reservation "athlon_fadd_load" 7
"athlon-direct,athlon-fpsched,athlon-faddmul")
(define_insn_reservation "athlon_fadd_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fop")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fadd")
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fadd_load_k8" 6
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fop")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fadd")
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fadd" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fop"))
"athlon-direct,athlon-fadd")
(define_insn_reservation "athlon_fmul_load" 7
"athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_fmul_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmul")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fmul")
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_fmul_load_k8" 6
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmul")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fmul")
"athlon-direct,athlon-fploadk8,athlon-fmul")
(define_insn_reservation "athlon_fmul" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fmul"))
"athlon-direct,athlon-fmul")
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fsgn" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fsgn"))
"athlon-direct,athlon-fmul")
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fdiv_load" 24
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fmul")
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_fdiv_load_k8" 13
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fmul")
"athlon-direct,athlon-fploadk8,athlon-fmul")
(define_insn_reservation "athlon_fdiv" 24
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "fdiv"))
"athlon-direct,athlon-fmul")
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fdiv_k8" 11
(and (eq_attr "cpu" "k8")
(eq_attr "type" "fdiv"))
"athlon-direct,athlon-fmul")
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fpspc_load" 103
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "fpspc")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fmul")
"athlon-vector,athlon-fpload,athlon-fvector")
(define_insn_reservation "athlon_fpspc" 100
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fpspc"))
"athlon-vector,athlon-fmul")
(define_insn_reservation "athlon_fcmov_load" 10
"athlon-vector,athlon-fpsched,athlon-fvector")
(define_insn_reservation "athlon_fcmov_load" 7
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fcmov")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fmul")
"athlon-vector,athlon-fpload,athlon-fvector")
(define_insn_reservation "athlon_fcmov" 7
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "fcmov"))
"athlon-vector,athlon-fmul")
"athlon-vector,athlon-fpsched,athlon-fvector")
(define_insn_reservation "athlon_fcmov_load_k8" 17
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fcmov")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fmul")
"athlon-vector,athlon-fploadk8,athlon-fvector")
(define_insn_reservation "athlon_fcmov_k8" 15
(and (eq_attr "cpu" "k8")
(eq_attr "type" "fcmov"))
"athlon-vector,athlon-fmul")
(define_insn_reservation "athlon_fcomi_load" 6
"athlon-vector,athlon-fpsched,athlon-fvector")
;; fcomi is vector decoded by uses only one pipe.
(define_insn_reservation "athlon_fcomi_load" 3
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fcmp")
(and (eq_attr "athlon_decode" "vector")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-load,athlon-fadd")
"athlon-vector,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fcomi_load_k8" 5
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fcmp")
(and (eq_attr "athlon_decode" "vector")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fcomi" 3
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "vector")
(eq_attr "type" "fcmp")))
"athlon-vector,athlon-fadd")
(define_insn_reservation "athlon_fcom_load" 5
(and (eq_attr "cpu" "athlon,k8")
"athlon-vector,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_fcom_load" 2
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fcmp")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fadd")
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fcom_load_k8" 4
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fcmp")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fcom" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fcmp"))
"athlon-direct,athlon-fadd")
(define_insn_reservation "athlon_fxch" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fxch"))
"athlon-direct,athlon-fany")
"athlon-direct,athlon-fpsched,athlon-fadd")
;; Never seen by the scheduler because we still don't do post reg-stack
;; scheduling.
;(define_insn_reservation "athlon_fxch" 2
; (and (eq_attr "cpu" "athlon,k8")
; (eq_attr "type" "fxch"))
; "athlon-direct,athlon-fpsched,athlon-fany")
;; Athlon handle MMX operations in the FPU unit with shorter latencies
(define_insn_reservation "athlon_movlpd_load" 4
(and (eq_attr "cpu" "athlon,k8")
(define_insn_reservation "athlon_movlpd_load" 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssemov")
(match_operand:DF 1 "memory_operand" "")))
"athlon-direct,athlon-load")
(define_insn_reservation "athlon_movaps_load" 4
(and (eq_attr "cpu" "athlon,k8")
"athlon-direct,athlon-fpload,athlon-fany")
(define_insn_reservation "athlon_movlpd_load_k8" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemov")
(match_operand:DF 1 "memory_operand" "")))
"athlon-direct,athlon-fploadk8,athlon-fstore")
(define_insn_reservation "athlon_movaps_load_k8" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "load"))))
"athlon-double,athlon-load")
(define_insn_reservation "athlon_movss_load" 3
(and (eq_attr "cpu" "athlon,k8")
"athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
(define_insn_reservation "athlon_movaps_load" 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
(define_insn_reservation "athlon_movss_load" 1
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "SF,DI")
(eq_attr "memory" "load"))))
"athlon-double,athlon-load")
(define_insn_reservation "athlon_mmxsseld" 4
(and (eq_attr "cpu" "athlon,k8")
"athlon-vector,athlon-fpload,(athlon-fany*2)")
(define_insn_reservation "athlon_movss_load_k8" 1
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "SF,DI")
(eq_attr "memory" "load"))))
"athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
(define_insn_reservation "athlon_mmxsseld" 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fany,athlon-load")
"athlon-direct,athlon-fpload,athlon-fany")
(define_insn_reservation "athlon_mmxsseld_k8" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fstore")
(define_insn_reservation "athlon_mmxssest" 3
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "mmxmov,ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "store,both"))))
"athlon-double,athlon-store")
(define_insn_reservation "athlon_mmxssest_k8" 2
"athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
(define_insn_reservation "athlon_mmxssest_k8" 3
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "mmxmov,ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "store,both"))))
"athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
(define_insn_reservation "athlon_mmxssest_short" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "store,both")))
"athlon-direct,athlon-store")
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_movaps" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemov")
(eq_attr "mode" "V4SF,V2DF")))
"athlon-double,athlon-faddmul,athlon-faddmul")
(eq_attr "mode" "V4SF,V2DF,TI")))
"athlon-double,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
(define_insn_reservation "athlon_movaps_k8" 2
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssemov")
(eq_attr "mode" "V4SF,V2DF,TI")))
"athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
(define_insn_reservation "athlon_mmxssemov" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "mmxmov,ssemov"))
"athlon-direct,athlon-faddmul")
(define_insn_reservation "athlon_mmxmul_load" 6
"athlon-direct,athlon-fpsched,athlon-faddmul")
(define_insn_reservation "athlon_mmxmul_load" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-fmul")
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_mmxmul" 3
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "mmxmul"))
"athlon-direct,athlon-fmul")
(define_insn_reservation "athlon_mmx_load" 5
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_mmx_load" 3
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "unit" "mmx")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load,athlon-faddmul")
"athlon-direct,athlon-fpload,athlon-faddmul")
(define_insn_reservation "athlon_mmx" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "unit" "mmx"))
"athlon-direct,athlon-faddmul")
"athlon-direct,athlon-fpsched,athlon-faddmul")
;; SSE operations are handled by the i387 unit as well. The latency
;; is same as for i387 operations for scalar operations
(define_insn_reservation "athlon_sselog_load" 6
(define_insn_reservation "athlon_sselog_load" 3
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fmul")
"athlon-vector,athlon-fpload2,(athlon-fmul*2)")
(define_insn_reservation "athlon_sselog_load_k8" 5
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "load")))
"athlon-double,athlon-load,athlon-fmul")
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
(define_insn_reservation "athlon_sselog" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "sselog"))
"athlon-vector,athlon-fmul")
"athlon-vector,athlon-fpsched,athlon-fmul*2")
(define_insn_reservation "athlon_sselog_k8" 3
(and (eq_attr "cpu" "k8")
(eq_attr "type" "sselog"))
"athlon-double,athlon-fmul")
(define_insn_reservation "athlon_ssecmp_load" 5
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecmp,ssecomi")
(and (eq_attr "mode" "SF,DF")
"athlon-double,athlon-fpsched,athlon-fmul")
;; ??? pcmp executes in addmul, probably not wortwhile to brother about that.
(define_insn_reservation "athlon_ssecmp_load" 2
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssecmp")
(and (eq_attr "mode" "SF,DF,DI")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-load,athlon-fadd")
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_ssecmp_load_k8" 4
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssecmp")
(and (eq_attr "mode" "SF,DF,DI,TI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecmp" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecmp,ssecomi")
(eq_attr "mode" "SF,DF")))
"athlon-direct,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector_load" 6
(and (eq_attr "type" "ssecmp")
(eq_attr "mode" "SF,DF,DI,TI")))
"athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector_load" 3
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssecmp,ssecomi")
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fadd")
"athlon-vector,athlon-fpload2,(athlon-fadd*2)")
(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssecmp,ssecomi")
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"athlon-double,athlon-fadd")
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
(define_insn_reservation "athlon_ssecmpvector" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssecmp,ssecomi"))
"athlon-vector,athlon-fadd")
(eq_attr "type" "ssecmp"))
"athlon-vector,athlon-fpsched,(athlon-fadd*2)")
(define_insn_reservation "athlon_ssecmpvector_k8" 3
(and (eq_attr "cpu" "k8")
(eq_attr "type" "ssecmp,ssecomi"))
"athlon-double,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load" 7
(eq_attr "type" "ssecmp"))
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
(define_insn_reservation "athlon_ssecomi_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_ssecomi_load_k8" 6
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecomi" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "ssecmp"))
"athlon-vector,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "sseadd")
(and (eq_attr "mode" "SF,DF")
(and (eq_attr "mode" "SF,DF,DI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fadd")
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load_k8" 6
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "sseadd")
(and (eq_attr "mode" "SF,DF")
(and (eq_attr "mode" "SF,DF,DI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fadd")
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_sseadd" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "sseadd")
(eq_attr "mode" "SF,DF")))
"athlon-direct,athlon-fadd")
(define_insn_reservation "athlon_sseaddvector_load" 8
(eq_attr "mode" "SF,DF,DI")))
"athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_sseaddvector_load" 5
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fadd")
"athlon-vector,athlon-fpload2,(athlon-fadd*2)")
(define_insn_reservation "athlon_sseaddvector_load_k8" 7
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fadd")
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
(define_insn_reservation "athlon_sseaddvector" 5
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "sseadd"))
"athlon-vector,athlon-fadd")
(define_insn_reservation "athlon_sseaddvector_k8" 4
"athlon-vector,athlon-fpsched,(athlon-fadd*2)")
(define_insn_reservation "athlon_sseaddvector_k8" 5
(and (eq_attr "cpu" "k8")
(eq_attr "type" "sseadd"))
"athlon-vector,athlon-fadd")
(define_insn_reservation "athlon_ssecvt_load" 5
(and (eq_attr "cpu" "athlon")
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
;; Conversions behaves very irregulary and the scheduling is critical here.
;; Take each instruction separately. Assume that the mode is always set to the
;; destination one and athlon_decode is set to the K8 versions.
;; cvtss2sd
(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
(and (eq_attr "cpu" "k8,athlon")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecvt_load_k8" 4
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecvt" 2
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "mode" "DF")
(eq_attr "memory" "load")))))
"athlon-direct,athlon-fploadk8,athlon-fstore")
(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecvt")
(eq_attr "mode" "SF,DF")))
"athlon-direct,athlon-fadd")
(define_insn_reservation "athlon_ssecvtvector_load" 6
(and (eq_attr "cpu" "athlon")
(and (eq_attr "athlon_decode" "direct")
(eq_attr "mode" "DF"))))
"athlon-direct,athlon-fpsched,athlon-fstore")
;; cvtps2pd. Model same way the other double decoded FP conversions.
(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
(and (eq_attr "cpu" "k8,athlon")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecvtvector_load_k8" 5
(and (eq_attr "cpu" "k8")
(and (eq_attr "athlon_decode" "double")
(and (eq_attr "mode" "V2DF,V4SF,TI")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
(and (eq_attr "cpu" "k8,athlon")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecvtvector" 5
(and (eq_attr "athlon_decode" "double")
(eq_attr "mode" "V2DF,V4SF,TI"))))
"athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath)
;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load")))))
"athlon-direct,athlon-fploadk8,athlon-fstore")
;; cvtsi2ss mem, reg is doublepath
(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssecvt"))
"athlon-vector,athlon-fadd")
(define_insn_reservation "athlon_ssecvtvector_k8" 3
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "double")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load")))))
"athlon-vector,athlon-fpload,(athlon-fstore*2)")
(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
(and (eq_attr "cpu" "k8")
(eq_attr "type" "ssecvt"))
"athlon-vector,athlon-fadd")
(define_insn_reservation "athlon_ssemul_load" 7
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "double")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fploadk8,(athlon-fstore*2)")
;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
(and (eq_attr "cpu" "k8,athlon")
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "double")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "none")))))
"athlon-double,athlon-fploadk8,athlon-fstore")
;; cvtsi2ss reg, reg is doublepath
(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "none")))))
"athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
(and (eq_attr "cpu" "k8,athlon")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "athlon_decode" "double")
(and (eq_attr "mode" "SF")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fploadk8,(athlon-fstore*3)")
;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "mode" "SF")
(eq_attr "memory" "none")))))
"athlon-vector,athlon-fpsched,(athlon-fvector*3)")
(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
;; ??? Why it is fater than cvtsd2ss?
(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "none")))))
"athlon-vector,athlon-fpsched,athlon-fvector*2")
;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "mode" "SI,DI")
(eq_attr "memory" "load")))))
"athlon-vector,athlon-fploadk8,athlon-fvector")
;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "double")
(and (eq_attr "mode" "SI,DI")
(eq_attr "memory" "none")))))
"athlon-vector,athlon-fpsched,athlon-fvector")
(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "sseicvt")
(and (eq_attr "athlon_decode" "double")
(and (eq_attr "mode" "SI,DI")
(eq_attr "memory" "none")))))
"athlon-double,athlon-fpsched,athlon-fstore")
(define_insn_reservation "athlon_ssemul_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssemul")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fmul")
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_ssemul_load_k8" 6
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemul")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fmul")
"athlon-direct,athlon-fploadk8,athlon-fmul")
(define_insn_reservation "athlon_ssemul" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssemul")
(eq_attr "mode" "SF,DF")))
"athlon-direct,athlon-fmul")
(define_insn_reservation "athlon_ssemulvector_load" 8
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_ssemulvector_load" 5
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fmul")
"athlon-vector,athlon-fpload2,(athlon-fmul*2)")
(define_insn_reservation "athlon_ssemulvector_load_k8" 7
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
"athlon-double,athlon-load,athlon-fmul")
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
(define_insn_reservation "athlon_ssemulvector" 5
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssemul"))
"athlon-vector,athlon-fmul")
"athlon-vector,athlon-fpsched,(athlon-fmul*2)")
(define_insn_reservation "athlon_ssemulvector_k8" 5
(and (eq_attr "cpu" "k8")
(eq_attr "type" "ssemul"))
"athlon-double,athlon-fmul")
(define_insn_reservation "athlon_ssediv_load" 19
"athlon-double,athlon-fpsched,(athlon-fmul*2)")
;; divsd timmings. divss is faster
(define_insn_reservation "athlon_ssediv_load" 20
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssediv")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fmul")
(define_insn_reservation "athlon_ssediv_load_k8" 18
"athlon-direct,athlon-fpload,athlon-fmul*17")
(define_insn_reservation "athlon_ssediv_load_k8" 22
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssediv")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-fmul")
(define_insn_reservation "athlon_ssediv" 16
"athlon-direct,athlon-fploadk8,athlon-fmul*17")
(define_insn_reservation "athlon_ssediv" 20
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssediv")
(eq_attr "mode" "SF,DF")))
"athlon-direct,athlon-fmul")
(define_insn_reservation "athlon_ssedivvector_load" 32
"athlon-direct,athlon-fpsched,athlon-fmul*17")
(define_insn_reservation "athlon_ssedivvector_load" 39
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fmul")
"athlon-vector,athlon-fpload2,athlon-fmul*34")
(define_insn_reservation "athlon_ssedivvector_load_k8" 35
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
"athlon-vector,athlon-load,athlon-fmul")
(define_insn_reservation "athlon_ssedivvector" 29
"athlon-double,athlon-fpload2k8,athlon-fmul*34")
(define_insn_reservation "athlon_ssedivvector" 39
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssediv"))
"athlon-vector,athlon-fmul")
(define_insn_reservation "athlon_ssedivvector_k8" 33
"athlon-vector,athlon-fmul*34")
(define_insn_reservation "athlon_ssedivvector_k8" 39
(and (eq_attr "cpu" "k8")
(eq_attr "type" "ssediv"))
"athlon-vector,athlon-fmul")
"athlon-double,athlon-fmul*34")

View File

@ -12125,13 +12125,21 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
&& !ix86_agi_dependant (insn, dep_insn, insn_type))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
if (dep_insn_type == TYPE_IMOV
|| dep_insn_type == TYPE_FMOV)
cost = 0;
else if (cost >= 3)
cost -= 3;
enum attr_unit unit = get_attr_unit (insn);
int loadcost = 3;
/* Because of the difference between the length of integer and
floating unit pipeline preparation stages, the memory operands
for floating point are cheaper.
??? For Athlon it the difference is most propbably 2. */
if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
loadcost = 3;
else
loadcost = TARGET_ATHLON ? 2 : 0;
if (cost >= loadcost)
cost -= loadcost;
else
cost = 0;
}

View File

@ -3990,7 +3990,7 @@
}
[(set_attr "type" "ssecvt,ssecvt,fmov")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "mode" "DF,DF,SF")])
(set_attr "mode" "SF,SF,SF")])
(define_insn "*truncdfsf2_2_nooverlap"
[(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
@ -4037,7 +4037,7 @@
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double")
(set_attr "mode" "DF")])
(set_attr "mode" "SF")])
(define_insn "*truncdfsf2_sse_only_nooverlap"
[(set (match_operand:SF 0 "register_operand" "=&Y")
@ -4443,7 +4443,8 @@
}
DONE;
}
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "DI")])
(define_insn "fix_truncdi_nomemory"
[(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
@ -4455,7 +4456,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
"#"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "DI")])
(define_insn "fix_truncdi_memory"
[(set (match_operand:DI 0 "memory_operand" "=m")
@ -4466,7 +4468,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
"* operands[5] = operands[4]; return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "DI")])
(define_split
[(set (match_operand:DI 0 "register_operand" "")
@ -4504,6 +4507,7 @@
"TARGET_64BIT && TARGET_SSE"
"cvttss2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
@ -4522,6 +4526,7 @@
"TARGET_64BIT && TARGET_SSE2"
"cvttsd2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt,sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
@ -4605,7 +4610,8 @@
}
DONE;
}
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "SI")])
(define_insn "fix_truncsi_nomemory"
[(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r")
@ -4616,7 +4622,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"#"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "SI")])
(define_insn "fix_truncsi_memory"
[(set (match_operand:SI 0 "memory_operand" "=m")
@ -4626,7 +4633,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"* return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "SI")])
;; When SSE available, it is always faster to use it!
(define_insn "fix_truncsfsi_sse"
@ -4635,6 +4643,7 @@
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
@ -4653,6 +4662,7 @@
"TARGET_SSE2"
"cvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
@ -4743,7 +4753,8 @@
}
DONE;
}
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "HI")])
(define_insn "fix_trunchi_nomemory"
[(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r")
@ -4754,7 +4765,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"#"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "HI")])
(define_insn "fix_trunchi_memory"
[(set (match_operand:HI 0 "memory_operand" "=m")
@ -4764,7 +4776,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"* return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "HI")])
(define_split
[(set (match_operand:HI 0 "memory_operand" "")
@ -20262,7 +20275,7 @@
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SF")])
(set_attr "mode" "SI")])
(define_insn "cvtss2siq"
[(set (match_operand:DI 0 "register_operand" "=r,r")
@ -20273,7 +20286,7 @@
"cvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SF")])
(set_attr "mode" "DI")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@ -21891,22 +21904,24 @@
;; Conversions between SI and DF
(define_insn "cvtsd2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"cvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SI")])
(define_insn "cvtsd2siq"
[(set (match_operand:DI 0 "register_operand" "=r")
(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE2 && TARGET_64BIT"
"cvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SI")])
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "DI")])
(define_insn "cvttsd2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")

View File

@ -4618,6 +4618,18 @@ parameter very large effectively disables garbage collection. Setting
this parameter and @option{ggc-min-expand} to zero causes a full
collection to occur at every opportunity.
@item reorder-blocks-duplicate
@itemx reorder-blocks-duplicate-feedback
Used by basic block reordering pass to decide whether to use uncondtional
branch or duplicate the code on it's destination. Code is duplicated when it's
estimated size is smaller than this value multiplied by the estimated size of
unconditinal jump in the hot spots of the program.
The @option{reorder-block-duplicate-feedback} is used only when profile
feedback is available and may be set to higher values than
@option{reorder-block-duplicate} since information about the hot spots is more
accurate.
@end table
@end table