mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-24 08:50:26 +08:00
Implementation of the pipeline description for Bulldozer (bdver1)
* gcc/config/i386/bdver1.md: New file. * gcc/config/i386/i386.md (include "bdver1.md"): Invoke the pipeline description for bdver1. (x86_sahf_1): Add "bdver1_decode" attribute. (*cmpfp_i_mixed): Likewise. (*cmpfp_i_sse): Likewise. (*cmpfp_i_i387): Likewise. (*cmpfp_iu_mixed): Likewise. (*cmpfp_iu_sse): Likewise. (*cmpfp_iu_387): Likewise. (*swap<mode>,*swap<mode>_1): Likewise. (fixuns_trunc<mode>hi2): Likewise. (fix_trunc<mode>si_sse): Likewise. (x86_fnstcw_1): Likewise. (x86_fldcw_1): Likewise. (*floatsi<mode>2_vector_mixed_with_temp): Likewise. (*floatsi<mode>2_vector_mixed): Likewise. (*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp): Likewise. (*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit): Likewise. (*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit): Likewise. (*floatsi<mode>2_vector_sse_with_temp): Likewise. (*floatsi<mode>2_vector_sse): Likewise. (*float<SSEMODEI24:mode><MODEF:mode>2_sse_with_temp): Likewise. (*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit): Likewise. (*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit): Likewise. (*mul<mode>3_1): Likewise. (*mulsi3_1_zext): Likewise. (*mulhi3_1): Likewise. (*mulqi3_1): Likewise. (*<u>mul<mode><dwi>3_1): Likewise. (*<u>mulqihi3_1): Likewise. (*<s>muldi3_highpart_1): Likewise. (*<s>mulsi3_highpart_1): Likewise. (*<s>mulsi3_highpart_zext): Likewise. (x86_64_shld): Likewise. (x86_shld): Likewise. (x86_64_shrd): Likewise. (x86_shrd): Likewise. (sqrtxf2): Likewise. (sqrt_extend<mode>xf2_i387): Likewise. (*sqrt<mode>2_sse): Likewise. * gcc/config/i386/sse.md (sse_cvtsi2ss): Add "bdver1_decode" attribute. (sse_cvtsi2ssq): Likewise. (sse_cvtss2si): Likewise. (sse_cvtss2si_2): Likewise. (sse_cvtss2siq): Likewise. (sse_cvtss2siq_2): Likewise. (sse_cvttss2si): Likewise. (sse_cvttss2siq): Likewise. (sse2_cvtpi2pd): Likewise. (sse2_cvttpd2pi): Likewise. (sse2_cvtsi2sd): Likewise. (sse2_cvtsi2sdq): Likewise. (sse2_cvtsd2si): Likewise. (sse2_cvtsd2si_2): Likewise. (sse2_cvtsd2siq): Likewise. (sse2_cvtsd2siq_2): Likewise. (sse2_cvttsd2si): Likewise. (sse2_cvttsd2siq): Likewise. (*sse2_cvtpd2dq): Likewise. (*sse2_cvttpd2dq): Likewise. (sse2_cvtsd2ss): Likewise. (sse2_cvtss2sd): Likewise. (*sse2_cvtpd2ps): Likewise. (sse2_cvtps2pd): Likewise. From-SVN: r165853
This commit is contained in:
parent
07db7d35a9
commit
6a08ffca63
@ -1,3 +1,71 @@
|
||||
2010-10-22 Changpeng Fang <changpeng.fang@amd.com>
|
||||
|
||||
* gcc/config/i386/bdver1.md: New file.
|
||||
* gcc/config/i386/i386.md (include "bdver1.md"): Invoke the
|
||||
pipeline description for bdver1.
|
||||
(x86_sahf_1): Add "bdver1_decode" attribute.
|
||||
(*cmpfp_i_mixed): Likewise.
|
||||
(*cmpfp_i_sse): Likewise.
|
||||
(*cmpfp_i_i387): Likewise.
|
||||
(*cmpfp_iu_mixed): Likewise.
|
||||
(*cmpfp_iu_sse): Likewise.
|
||||
(*cmpfp_iu_387): Likewise.
|
||||
(*swap<mode>,*swap<mode>_1): Likewise.
|
||||
(fixuns_trunc<mode>hi2): Likewise.
|
||||
(fix_trunc<mode>si_sse): Likewise.
|
||||
(x86_fnstcw_1): Likewise.
|
||||
(x86_fldcw_1): Likewise.
|
||||
(*floatsi<mode>2_vector_mixed_with_temp): Likewise.
|
||||
(*floatsi<mode>2_vector_mixed): Likewise.
|
||||
(*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp): Likewise.
|
||||
(*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit): Likewise.
|
||||
(*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit): Likewise.
|
||||
(*floatsi<mode>2_vector_sse_with_temp): Likewise.
|
||||
(*floatsi<mode>2_vector_sse): Likewise.
|
||||
(*float<SSEMODEI24:mode><MODEF:mode>2_sse_with_temp): Likewise.
|
||||
(*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit): Likewise.
|
||||
(*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit): Likewise.
|
||||
(*mul<mode>3_1): Likewise.
|
||||
(*mulsi3_1_zext): Likewise.
|
||||
(*mulhi3_1): Likewise.
|
||||
(*mulqi3_1): Likewise.
|
||||
(*<u>mul<mode><dwi>3_1): Likewise.
|
||||
(*<u>mulqihi3_1): Likewise.
|
||||
(*<s>muldi3_highpart_1): Likewise.
|
||||
(*<s>mulsi3_highpart_1): Likewise.
|
||||
(*<s>mulsi3_highpart_zext): Likewise.
|
||||
(x86_64_shld): Likewise.
|
||||
(x86_shld): Likewise.
|
||||
(x86_64_shrd): Likewise.
|
||||
(x86_shrd): Likewise.
|
||||
(sqrtxf2): Likewise.
|
||||
(sqrt_extend<mode>xf2_i387): Likewise.
|
||||
(*sqrt<mode>2_sse): Likewise.
|
||||
* gcc/config/i386/sse.md (sse_cvtsi2ss): Add "bdver1_decode" attribute.
|
||||
(sse_cvtsi2ssq): Likewise.
|
||||
(sse_cvtss2si): Likewise.
|
||||
(sse_cvtss2si_2): Likewise.
|
||||
(sse_cvtss2siq): Likewise.
|
||||
(sse_cvtss2siq_2): Likewise.
|
||||
(sse_cvttss2si): Likewise.
|
||||
(sse_cvttss2siq): Likewise.
|
||||
(sse2_cvtpi2pd): Likewise.
|
||||
(sse2_cvttpd2pi): Likewise.
|
||||
(sse2_cvtsi2sd): Likewise.
|
||||
(sse2_cvtsi2sdq): Likewise.
|
||||
(sse2_cvtsd2si): Likewise.
|
||||
(sse2_cvtsd2si_2): Likewise.
|
||||
(sse2_cvtsd2siq): Likewise.
|
||||
(sse2_cvtsd2siq_2): Likewise.
|
||||
(sse2_cvttsd2si): Likewise.
|
||||
(sse2_cvttsd2siq): Likewise.
|
||||
(*sse2_cvtpd2dq): Likewise.
|
||||
(*sse2_cvttpd2dq): Likewise.
|
||||
(sse2_cvtsd2ss): Likewise.
|
||||
(sse2_cvtss2sd): Likewise.
|
||||
(*sse2_cvtpd2ps): Likewise.
|
||||
(sse2_cvtps2pd): Likewise.
|
||||
|
||||
2010-10-22 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/46137
|
||||
|
796
gcc/config/i386/bdver1.md
Normal file
796
gcc/config/i386/bdver1.md
Normal file
@ -0,0 +1,796 @@
|
||||
;; Copyright (C) 2010, Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
;;
|
||||
;; GCC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 3, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GCC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GCC; see the file COPYING3. If not see
|
||||
;; <http://www.gnu.org/licenses/>.
|
||||
;;
|
||||
;; AMD bdver1 Scheduling
|
||||
;;
|
||||
;; The bdver1 contains four pipelined FP units, two integer units and
|
||||
;; two address generation units.
|
||||
;;
|
||||
;; The predecode logic is determining boundaries of instructions in the 64
|
||||
;; byte cache line. So the cache line straddling problem of K6 might be issue
|
||||
;; here as well, but it is not noted in the documentation.
|
||||
;;
|
||||
;; Three DirectPath instructions decoders and only one VectorPath decoder
|
||||
;; is available. They can decode three DirectPath instructions or one
|
||||
;; VectorPath instruction per cycle.
|
||||
;;
|
||||
;; The load/store queue unit is not attached to the schedulers but
|
||||
;; communicates with all the execution units separately instead.
|
||||
|
||||
|
||||
(define_attr "bdver1_decode" "direct,vector,double"
|
||||
(const_string "direct"))
|
||||
|
||||
(define_automaton "bdver1,bdver1_int,bdver1_load,bdver1_mult,bdver1_fp")
|
||||
|
||||
(define_cpu_unit "bdver1-decode0" "bdver1")
|
||||
(define_cpu_unit "bdver1-decode1" "bdver1")
|
||||
(define_cpu_unit "bdver1-decode2" "bdver1")
|
||||
(define_cpu_unit "bdver1-decodev" "bdver1")
|
||||
|
||||
;; Model the fact that double decoded instruction may take 2 cycles
|
||||
;; to decode when decoder2 and decoder0 in next cycle
|
||||
;; is used (this is needed to allow throughput of 1.5 double decoded
|
||||
;; instructions per cycle).
|
||||
;;
|
||||
;; In order to avoid dependence between reservation of decoder
|
||||
;; and other units, we model decoder as two stage fully pipelined unit
|
||||
;; and only double decoded instruction may occupy unit in the first cycle.
|
||||
;; With this scheme however two double instructions can be issued cycle0.
|
||||
;;
|
||||
;; Avoid this by using presence set requiring decoder0 to be allocated
|
||||
;; too. Vector decoded instructions then can't be issued when modeled
|
||||
;; as consuming decoder0+decoder1+decoder2.
|
||||
;; We solve that by specialized vector decoder unit and exclusion set.
|
||||
(presence_set "bdver1-decode2" "bdver1-decode0")
|
||||
(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
|
||||
|
||||
(define_reservation "bdver1-vector" "nothing,bdver1-decodev")
|
||||
(define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
|
||||
(define_reservation "bdver1-direct" "nothing,
|
||||
(bdver1-decode0 | bdver1-decode1
|
||||
| bdver1-decode2)")
|
||||
;; Double instructions behaves like two direct instructions.
|
||||
(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
|
||||
| (nothing,(bdver1-decode0 + bdver1-decode1))
|
||||
| (nothing,(bdver1-decode1 + bdver1-decode2)))")
|
||||
|
||||
|
||||
(define_cpu_unit "bdver1-ieu0" "bdver1_int")
|
||||
(define_cpu_unit "bdver1-ieu1" "bdver1_int")
|
||||
(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
|
||||
|
||||
(define_cpu_unit "bdver1-agu0" "bdver1_int")
|
||||
(define_cpu_unit "bdver1-agu1" "bdver1_int")
|
||||
(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
|
||||
|
||||
(define_cpu_unit "bdver1-mult" "bdver1_mult")
|
||||
|
||||
(define_cpu_unit "bdver1-load0" "bdver1_load")
|
||||
(define_cpu_unit "bdver1-load1" "bdver1_load")
|
||||
(define_reservation "bdver1-load" "bdver1-agu,
|
||||
(bdver1-load0 | bdver1-load1),nothing")
|
||||
;; 128bit SSE instructions issue two loads at once.
|
||||
(define_reservation "bdver1-load2" "bdver1-agu,
|
||||
(bdver1-load0 + bdver1-load1),nothing")
|
||||
|
||||
(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
|
||||
;; 128bit SSE instructions issue two stores at once.
|
||||
(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
|
||||
|
||||
;; The FP operations start to execute at stage 12 in the pipeline, while
|
||||
;; integer operations start to execute at stage 9 for athlon and 11 for K8
|
||||
;; Compensate the difference for athlon because it results in significantly
|
||||
;; smaller automata.
|
||||
;; NOTE: the above information was just copied from athlon.md, and was not
|
||||
;; actually verified for bdver1.
|
||||
(define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
|
||||
;; The floating point loads.
|
||||
(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
|
||||
(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
|
||||
|
||||
;; Four FP units.
|
||||
(define_cpu_unit "bdver1-ffma0" "bdver1_fp")
|
||||
(define_cpu_unit "bdver1-ffma1" "bdver1_fp")
|
||||
(define_cpu_unit "bdver1-fmal0" "bdver1_fp")
|
||||
(define_cpu_unit "bdver1-fmal1" "bdver1_fp")
|
||||
|
||||
(define_reservation "bdver1-ffma" "(bdver1-ffma0 | bdver1-ffma1)")
|
||||
(define_reservation "bdver1-fcvt" "bdver1-ffma0")
|
||||
(define_reservation "bdver1-fmma" "bdver1-ffma0")
|
||||
(define_reservation "bdver1-fxbar" "bdver1-ffma1")
|
||||
(define_reservation "bdver1-fmal" "(bdver1-fmal0 | bdver1-fmal1)")
|
||||
(define_reservation "bdver1-fsto" "bdver1-fmal1")
|
||||
|
||||
;; Vector operations usually consume many of pipes.
|
||||
(define_reservation "bdver1-fvector" "(bdver1-ffma0 + bdver1-ffma1
|
||||
+ bdver1-fmal0 + bdver1-fmal1)")
|
||||
|
||||
;; Jump instructions are executed in the branch unit completely transparent to us.
|
||||
(define_insn_reservation "bdver1_call" 0
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"bdver1-double,bdver1-agu,bdver1-ieu")
|
||||
;; PUSH mem is double path.
|
||||
(define_insn_reservation "bdver1_push" 1
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "push"))
|
||||
"bdver1-direct,bdver1-agu,bdver1-store")
|
||||
;; POP r16/mem are double path.
|
||||
(define_insn_reservation "bdver1_pop" 1
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "pop"))
|
||||
"bdver1-direct,(bdver1-ieu+bdver1-load)")
|
||||
;; LEAVE no latency info so far, assume same with amdfam10.
|
||||
(define_insn_reservation "bdver1_leave" 3
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "leave"))
|
||||
"bdver1-vector,(bdver1-ieu+bdver1-load)")
|
||||
;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
|
||||
(define_insn_reservation "bdver1_lea" 1
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "lea"))
|
||||
"bdver1-direct,bdver1-agu,nothing")
|
||||
|
||||
;; MUL executes in special multiplier unit attached to IEU1.
|
||||
(define_insn_reservation "bdver1_imul_DI" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "imul")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"bdver1-direct1,bdver1-ieu1,bdver1-mult,nothing,bdver1-ieu1")
|
||||
(define_insn_reservation "bdver1_imul" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"bdver1-direct1,bdver1-ieu1,bdver1-mult,bdver1-ieu1")
|
||||
(define_insn_reservation "bdver1_imul_mem_DI" 10
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "imul")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "load,both"))))
|
||||
"bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,nothing,bdver1-ieu")
|
||||
(define_insn_reservation "bdver1_imul_mem" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,bdver1-ieu")
|
||||
|
||||
;; IDIV cannot execute in parallel with other instructions. Dealing with it
|
||||
;; as with short latency vector instruction is good approximation avoiding
|
||||
;; scheduler from trying too hard to can hide it's latency by overlap with
|
||||
;; other instructions.
|
||||
;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
|
||||
;; of the other code.
|
||||
(define_insn_reservation "bdver1_idiv" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
|
||||
|
||||
(define_insn_reservation "bdver1_idiv_mem" 10
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
|
||||
|
||||
;; The parallelism of string instructions is not documented. Model it same way
|
||||
;; as IDIV to create smaller automata. This probably does not matter much.
|
||||
;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
|
||||
(define_insn_reservation "bdver1_str" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "str")
|
||||
(eq_attr "memory" "load,both,store")))
|
||||
"bdver1-vector,bdver1-load,bdver1-ieu0*6")
|
||||
|
||||
;; Integer instructions.
|
||||
(define_insn_reservation "bdver1_idirect" 1
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"bdver1-direct,bdver1-ieu")
|
||||
(define_insn_reservation "bdver1_ivector" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"bdver1-vector,bdver1-ieu,bdver1-ieu")
|
||||
(define_insn_reservation "bdver1_idirect_loadmov" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-load")
|
||||
(define_insn_reservation "bdver1_idirect_load" 5
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-load,bdver1-ieu")
|
||||
(define_insn_reservation "bdver1_ivector_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
|
||||
(define_insn_reservation "bdver1_idirect_movstore" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "store")))
|
||||
"bdver1-direct,bdver1-agu,bdver1-store")
|
||||
(define_insn_reservation "bdver1_idirect_both" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "both"))))
|
||||
"bdver1-direct,bdver1-load,
|
||||
bdver1-ieu,bdver1-store,
|
||||
bdver1-store")
|
||||
(define_insn_reservation "bdver1_ivector_both" 5
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "both"))))
|
||||
"bdver1-vector,bdver1-load,
|
||||
bdver1-ieu,
|
||||
bdver1-ieu,
|
||||
bdver1-store")
|
||||
(define_insn_reservation "bdver1_idirect_store" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "store"))))
|
||||
"bdver1-direct,(bdver1-ieu+bdver1-agu),
|
||||
bdver1-store")
|
||||
(define_insn_reservation "bdver1_ivector_store" 5
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "store"))))
|
||||
"bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
|
||||
bdver1-store")
|
||||
|
||||
;; BDVER1 floating point units.
|
||||
(define_insn_reservation "bdver1_fldxf" 13
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "mode" "XF"))))
|
||||
"bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
|
||||
(define_insn_reservation "bdver1_fld" 5
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fstxf" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "store,both")
|
||||
(eq_attr "mode" "XF"))))
|
||||
"bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
|
||||
(define_insn_reservation "bdver1_fst" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
|
||||
(define_insn_reservation "bdver1_fist" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fistp,fisttp"))
|
||||
"bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
|
||||
(define_insn_reservation "bdver1_fmov_bdver1" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fmov"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fadd_load" 10
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fop")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fadd" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fop"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fmul_load" 10
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fmul")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-double,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fmul" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fmul"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fsgn" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fsgn"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fdiv_load" 46
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fdiv")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fdiv" 42
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fdiv"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fpspc_load" 103
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fpspc")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-vector,bdver1-fpload,bdver1-fvector")
|
||||
(define_insn_reservation "bdver1_fpspc" 100
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fpspc")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-vector,bdver1-fpload,bdver1-fvector")
|
||||
(define_insn_reservation "bdver1_fcmov_load" 17
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fcmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-vector,bdver1-fpload,bdver1-fvector")
|
||||
(define_insn_reservation "bdver1_fcmov" 15
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fcmov"))
|
||||
"bdver1-vector,bdver1-fpsched,bdver1-fvector")
|
||||
(define_insn_reservation "bdver1_fcomi_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(and (eq_attr "bdver1_decode" "double")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
|
||||
(define_insn_reservation "bdver1_fcomi" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "bdver1_decode" "double")
|
||||
(eq_attr "type" "fcmp")))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
|
||||
(define_insn_reservation "bdver1_fcom_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fcom" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fcmp"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_fxch" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "fxch"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
|
||||
;; SSE loads.
|
||||
(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "prefix" "vex")
|
||||
(and (eq_attr "movu" "1")
|
||||
(and (eq_attr "mode" "V4SF,V2DF")
|
||||
(eq_attr "memory" "load"))))))
|
||||
"bdver1-direct,bdver1-fpload")
|
||||
(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "movu" "1")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"bdver1-double,bdver1-fpload")
|
||||
(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "movu" "1")
|
||||
(and (eq_attr "mode" "V4SF,V2DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_ssevector_avx128_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "prefix" "vex")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_ssevector_avx256_load" 5
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-double,bdver1-fpload,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_ssevector_sse128_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-fpload")
|
||||
(define_insn_reservation "bdver1_ssescalar_movq_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "prefix" "vex")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"bdver1-direct,bdver1-fpload")
|
||||
(define_insn_reservation "bdver1_ssescalar_sse128_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-fpload, bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_mmxsse_load" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload, bdver1-fmal")
|
||||
|
||||
;; SSE stores.
|
||||
(define_insn_reservation "bdver1_sse_store_avx256" 5
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(eq_attr "memory" "store,both"))))
|
||||
"bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
|
||||
(define_insn_reservation "bdver1_sse_store" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "store,both"))))
|
||||
"bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
|
||||
(define_insn_reservation "bdver1_mmxsse_store_short" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
|
||||
|
||||
;; Register moves.
|
||||
(define_insn_reservation "bdver1_ssevector_avx256" 3
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-double,bdver1-fpsched,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_movss_movsd" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_mmxssemov" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "none")))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fmal")
|
||||
;; SSE logs.
|
||||
(define_insn_reservation "bdver1_sselog_load_256" 7
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sselog,sselog1")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-double,bdver1-fpload,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_sselog_256" 3
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sselog,sselog1")
|
||||
(eq_attr "mode" "V8SF")))
|
||||
"bdver1-double,bdver1-fpsched,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_sselog_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sselog,sselog1")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fxbar")
|
||||
(define_insn_reservation "bdver1_sselog" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "sselog,sselog1"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fxbar")
|
||||
|
||||
;; PCMP actually executes in FMAL.
|
||||
(define_insn_reservation "bdver1_ssecmp_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_ssecmp" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "ssecmp"))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_ssecomi_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
|
||||
(define_insn_reservation "bdver1_ssecomi" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(eq_attr "type" "ssecomi"))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
|
||||
|
||||
;; Conversions behaves very irregularly and the scheduling is critical here.
|
||||
;; Take each instruction separately.
|
||||
|
||||
;; 256 bit conversion.
|
||||
(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(ior (ior (match_operand:V4DF 0 "register_operand")
|
||||
(ior (match_operand:V8SF 0 "register_operand")
|
||||
(match_operand:V8SI 0 "register_operand")))
|
||||
(ior (match_operand:V4DF 1 "nonimmediate_operand")
|
||||
(ior (match_operand:V8SF 1 "nonimmediate_operand")
|
||||
(match_operand:V8SI 1 "nonimmediate_operand")))))))
|
||||
"bdver1-vector,bdver1-fpload,bdver1-fvector")
|
||||
(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "none")
|
||||
(ior (ior (match_operand:V4DF 0 "register_operand")
|
||||
(ior (match_operand:V8SF 0 "register_operand")
|
||||
(match_operand:V8SI 0 "register_operand")))
|
||||
(ior (match_operand:V4DF 1 "nonimmediate_operand")
|
||||
(ior (match_operand:V8SF 1 "nonimmediate_operand")
|
||||
(match_operand:V8SI 1 "nonimmediate_operand")))))))
|
||||
"bdver1-vector,bdver1-fpsched,bdver1-fvector")
|
||||
;; CVTSS2SD, CVTSD2SS.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fcvt")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fcvt")
|
||||
;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
|
||||
(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fcvt")
|
||||
(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
|
||||
;; CVTPD2PS.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V4SF 0 "register_operand")
|
||||
(match_operand:V2DF 1 "nonimmediate_operand")))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (match_operand:V4SF 0 "register_operand")
|
||||
(match_operand:V2DF 1 "nonimmediate_operand")))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
|
||||
;; CVTPI2PS, CVTDQ2PS.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V4SF 0 "register_operand")
|
||||
(ior (match_operand:V2SI 1 "nonimmediate_operand")
|
||||
(match_operand:V4SI 1 "nonimmediate_operand"))))))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fcvt")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (match_operand:V4SF 0 "register_operand")
|
||||
(ior (match_operand:V2SI 1 "nonimmediate_operand")
|
||||
(match_operand:V4SI 1 "nonimmediate_operand"))))))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fcvt")
|
||||
;; CVTDQ2PD.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V2DF 0 "register_operand")
|
||||
(match_operand:V4SI 1 "nonimmediate_operand")))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (match_operand:V2DF 0 "register_operand")
|
||||
(match_operand:V4SI 1 "nonimmediate_operand")))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
|
||||
;; CVTPS2PD, CVTPI2PD.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V2DF 0 "register_operand")
|
||||
(ior (match_operand:V2SI 1 "nonimmediate_operand")
|
||||
(match_operand:V4SF 1 "nonimmediate_operand"))))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V2DF 0 "register_operand")
|
||||
(ior (match_operand:V2SI 1 "nonimmediate_operand")
|
||||
(match_operand:V4SF 1 "nonimmediate_operand"))))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
|
||||
;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
|
||||
;; CVTPD2PI, CVTTPD2PI.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V2DF 1 "nonimmediate_operand")
|
||||
(match_operand:V2SI 0 "register_operand")))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (match_operand:V2DF 1 "nonimmediate_operand")
|
||||
(match_operand:V2SI 0 "register_operand")))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
|
||||
;; CVTPD2DQ, CVTTPD2DQ.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V2DF 1 "nonimmediate_operand")
|
||||
(match_operand:V4SI 0 "register_operand")))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (match_operand:V2DF 1 "nonimmediate_operand")
|
||||
(match_operand:V4SI 0 "register_operand")))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
|
||||
;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (match_operand:V4SF 1 "nonimmediate_operand")
|
||||
(ior (match_operand: V2SI 0 "register_operand")
|
||||
(match_operand: V4SI 0 "register_operand"))))))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fcvt")
|
||||
(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (match_operand:V4SF 1 "nonimmediate_operand")
|
||||
(ior (match_operand: V2SI 0 "register_operand")
|
||||
(match_operand: V4SI 0 "register_operand"))))))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fcvt")
|
||||
|
||||
;; SSE MUL, ADD, and MULADD.
|
||||
(define_insn_reservation "bdver1_ssemuladd_load_256" 11
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemul,sseadd,ssemuladd")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-double,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_ssemuladd_256" 7
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemul,sseadd,ssemuladd")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-double,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_ssemuladd_load" 10
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemul,sseadd,ssemuladd")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_ssemuladd" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssemul,sseadd,ssemuladd")
|
||||
(eq_attr "memory" "none")))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
|
||||
(define_insn_reservation "bdver1_sseimul_load" 8
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseimul")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fmma")
|
||||
(define_insn_reservation "bdver1_sseimul" 4
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseimul")
|
||||
(eq_attr "memory" "none")))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fmma")
|
||||
(define_insn_reservation "bdver1_sseiadd_load" 6
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseiadd")
|
||||
(eq_attr "memory" "load")))
|
||||
"bdver1-direct,bdver1-fpload,bdver1-fmal")
|
||||
(define_insn_reservation "bdver1_sseiadd" 2
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseiadd")
|
||||
(eq_attr "memory" "none")))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fmal")
|
||||
|
||||
;; SSE DIV: no throughput information (assume same as amdfam10).
|
||||
(define_insn_reservation "bdver1_ssediv_double_load_256" 31
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
(define_insn_reservation "bdver1_ssediv_double_256" 27
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
(define_insn_reservation "bdver1_ssediv_single_load_256" 28
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
(define_insn_reservation "bdver1_ssediv_single_256" 24
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
(define_insn_reservation "bdver1_ssediv_double_load" 31
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "DF,V2DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
(define_insn_reservation "bdver1_ssediv_double" 27
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "DF,V2DF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
(define_insn_reservation "bdver1_ssediv_single_load" 28
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "SF,V4SF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
(define_insn_reservation "bdver1_ssediv_single" 24
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "SF,V4SF")
|
||||
(eq_attr "memory" "none"))))
|
||||
"bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
|
||||
|
||||
(define_insn_reservation "bdver1_sseins" 3
|
||||
(and (eq_attr "cpu" "bdver1")
|
||||
(and (eq_attr "type" "sseins")
|
||||
(eq_attr "mode" "TI")))
|
||||
"bdver1-direct,bdver1-fpsched,bdver1-fxbar")
|
||||
|
@ -928,6 +928,7 @@
|
||||
(include "ppro.md")
|
||||
(include "k6.md")
|
||||
(include "athlon.md")
|
||||
(include "bdver1.md")
|
||||
(include "geode.md")
|
||||
(include "atom.md")
|
||||
|
||||
@ -1456,6 +1457,7 @@
|
||||
[(set_attr "length" "1")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; Pentium Pro can do steps 1 through 3 in one go.
|
||||
@ -1486,7 +1488,8 @@
|
||||
]
|
||||
(const_string "0")))
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*cmpfp_i_sse"
|
||||
[(set (reg:CCFP FLAGS_REG)
|
||||
@ -1508,7 +1511,8 @@
|
||||
(const_string "1")
|
||||
(const_string "0")))
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*cmpfp_i_i387"
|
||||
[(set (reg:CCFP FLAGS_REG)
|
||||
@ -1528,7 +1532,8 @@
|
||||
]
|
||||
(const_string "XF")))
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*cmpfp_iu_mixed"
|
||||
[(set (reg:CCFPU FLAGS_REG)
|
||||
@ -1556,7 +1561,8 @@
|
||||
]
|
||||
(const_string "0")))
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*cmpfp_iu_sse"
|
||||
[(set (reg:CCFPU FLAGS_REG)
|
||||
@ -1578,7 +1584,8 @@
|
||||
(const_string "1")
|
||||
(const_string "0")))
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*cmpfp_iu_387"
|
||||
[(set (reg:CCFPU FLAGS_REG)
|
||||
@ -1598,7 +1605,8 @@
|
||||
]
|
||||
(const_string "XF")))
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "direct")])
|
||||
|
||||
;; Push/pop instructions.
|
||||
|
||||
@ -2352,7 +2360,8 @@
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*swap<mode>_1"
|
||||
[(set (match_operand:SWI12 0 "register_operand" "+r")
|
||||
@ -2365,7 +2374,8 @@
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL
|
||||
;; is disabled for AMDFAM10
|
||||
@ -4560,7 +4570,8 @@
|
||||
(set_attr "prefix_rex" "1")
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")])
|
||||
|
||||
(define_insn "fix_trunc<mode>si_sse"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r,r")
|
||||
@ -4572,7 +4583,8 @@
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")])
|
||||
|
||||
;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
|
||||
(define_peephole2
|
||||
@ -4827,7 +4839,8 @@
|
||||
[(set (attr "length")
|
||||
(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
|
||||
(set_attr "mode" "HI")
|
||||
(set_attr "unit" "i387")])
|
||||
(set_attr "unit" "i387")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "x86_fldcw_1"
|
||||
[(set (reg:HI FPCR_REG)
|
||||
@ -4839,7 +4852,8 @@
|
||||
(set_attr "mode" "HI")
|
||||
(set_attr "unit" "i387")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
;; Conversion between fixed point and floating point.
|
||||
|
||||
@ -4993,6 +5007,7 @@
|
||||
(set_attr "unit" "*,i387,*,*,*")
|
||||
(set_attr "athlon_decode" "*,*,double,direct,double")
|
||||
(set_attr "amdfam10_decode" "*,*,vector,double,double")
|
||||
(set_attr "bdver1_decode" "*,*,double,direct,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatsi<mode>2_vector_mixed"
|
||||
@ -5008,6 +5023,7 @@
|
||||
(set_attr "unit" "i387,*")
|
||||
(set_attr "athlon_decode" "*,direct")
|
||||
(set_attr "amdfam10_decode" "*,double")
|
||||
(set_attr "bdver1_decode" "*,direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp"
|
||||
@ -5023,6 +5039,7 @@
|
||||
(set_attr "unit" "*,i387,*,*")
|
||||
(set_attr "athlon_decode" "*,*,double,direct")
|
||||
(set_attr "amdfam10_decode" "*,*,vector,double")
|
||||
(set_attr "bdver1_decode" "*,*,double,direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_split
|
||||
@ -5075,6 +5092,7 @@
|
||||
(set_attr "unit" "i387,*,*")
|
||||
(set_attr "athlon_decode" "*,double,direct")
|
||||
(set_attr "amdfam10_decode" "*,vector,double")
|
||||
(set_attr "bdver1_decode" "*,double,direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit"
|
||||
@ -5098,6 +5116,7 @@
|
||||
(const_string "*")))
|
||||
(set_attr "athlon_decode" "*,direct")
|
||||
(set_attr "amdfam10_decode" "*,double")
|
||||
(set_attr "bdver1_decode" "*,direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatsi<mode>2_vector_sse_with_temp"
|
||||
@ -5112,6 +5131,7 @@
|
||||
(set_attr "mode" "<MODE>,<MODE>,<ssevecmode>")
|
||||
(set_attr "athlon_decode" "double,direct,double")
|
||||
(set_attr "amdfam10_decode" "vector,double,double")
|
||||
(set_attr "bdver1_decode" "double,direct,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatsi<mode>2_vector_sse"
|
||||
@ -5124,6 +5144,7 @@
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set_attr "athlon_decode" "direct")
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_split
|
||||
@ -5259,6 +5280,7 @@
|
||||
(set_attr "mode" "<MODEF:MODE>")
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "double,direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit"
|
||||
@ -5280,6 +5302,7 @@
|
||||
(const_string "*")))
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "double,direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_split
|
||||
@ -5314,6 +5337,7 @@
|
||||
(const_string "*")))
|
||||
(set_attr "athlon_decode" "direct")
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_split
|
||||
@ -6851,6 +6875,8 @@
|
||||
;; IMUL reg32/64, mem32/64, imm32 VectorPath
|
||||
;; IMUL reg32/64, reg32/64 Direct
|
||||
;; IMUL reg32/64, mem32/64 Direct
|
||||
;;
|
||||
;; On BDVER1, all above IMULs use DirectPath
|
||||
|
||||
(define_insn "*mul<mode>3_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r,r,r")
|
||||
@ -6879,6 +6905,7 @@
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*mulsi3_1_zext"
|
||||
@ -6909,6 +6936,7 @@
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; On AMDFAM10
|
||||
@ -6918,6 +6946,8 @@
|
||||
;; IMUL reg16, mem16, imm16 VectorPath
|
||||
;; IMUL reg16, reg16 Direct
|
||||
;; IMUL reg16, mem16 Direct
|
||||
;;
|
||||
;; On BDVER1, all HI MULs use DoublePath
|
||||
|
||||
(define_insn "*mulhi3_1"
|
||||
[(set (match_operand:HI 0 "register_operand" "=r,r,r")
|
||||
@ -6942,9 +6972,10 @@
|
||||
(cond [(eq_attr "alternative" "0,1")
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set_attr "bdver1_decode" "double")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
;;On AMDFAM10
|
||||
;;On AMDFAM10 and BDVER1
|
||||
;; MUL reg8 Direct
|
||||
;; MUL mem8 Direct
|
||||
|
||||
@ -6963,6 +6994,7 @@
|
||||
(const_string "vector")
|
||||
(const_string "direct")))
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
(define_expand "<u>mul<mode><dwi>3"
|
||||
@ -7001,6 +7033,7 @@
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*<u>mulqihi3_1"
|
||||
@ -7021,6 +7054,7 @@
|
||||
(const_string "vector")
|
||||
(const_string "direct")))
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
(define_expand "<s>mul<mode>3_highpart"
|
||||
@ -7060,6 +7094,7 @@
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "*<s>mulsi3_highpart_1"
|
||||
@ -7083,6 +7118,7 @@
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*<s>mulsi3_highpart_zext"
|
||||
@ -7106,6 +7142,7 @@
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; The patterns that match these are at the end of this file.
|
||||
@ -9094,7 +9131,8 @@
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "x86_shld"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
|
||||
@ -9110,7 +9148,8 @@
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_expand "x86_shift<mode>_adj_1"
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
@ -9791,7 +9830,8 @@
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "x86_shrd"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
|
||||
@ -9807,7 +9847,8 @@
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "ashrdi3_cvt"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
|
||||
@ -12931,7 +12972,8 @@
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "mode" "XF")
|
||||
(set_attr "athlon_decode" "direct")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "direct")])
|
||||
|
||||
(define_insn "sqrt_extend<mode>xf2_i387"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f")
|
||||
@ -12943,7 +12985,8 @@
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "mode" "XF")
|
||||
(set_attr "athlon_decode" "direct")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "direct")])
|
||||
|
||||
(define_insn "*rsqrtsf2_sse"
|
||||
[(set (match_operand:SF 0 "register_operand" "=x")
|
||||
@ -12977,7 +13020,8 @@
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set_attr "athlon_decode" "*")
|
||||
(set_attr "amdfam10_decode" "*")])
|
||||
(set_attr "amdfam10_decode" "*")
|
||||
(set_attr "bdver1_decode" "*")])
|
||||
|
||||
(define_expand "sqrt<mode>2"
|
||||
[(set (match_operand:MODEF 0 "register_operand" "")
|
||||
|
@ -2269,6 +2269,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "double,direct")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "*avx_cvtsi2ssq"
|
||||
@ -2298,6 +2299,7 @@
|
||||
(set_attr "prefix_rex" "1")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "double,direct")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "sse_cvtss2si"
|
||||
@ -2311,6 +2313,7 @@
|
||||
"%vcvtss2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SI")])
|
||||
@ -2324,6 +2327,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SI")])
|
||||
@ -2339,6 +2343,7 @@
|
||||
"%vcvtss2si{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "DI")])
|
||||
@ -2352,6 +2357,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "DI")])
|
||||
@ -2367,6 +2373,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SI")])
|
||||
@ -2382,6 +2389,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "DI")])
|
||||
@ -2496,7 +2504,8 @@
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "unit" "mmx")
|
||||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "mode" "DI")])
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "sse2_cvttpd2pi"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y")
|
||||
@ -2506,7 +2515,8 @@
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "unit" "mmx")
|
||||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
(set_attr "mode" "TI")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*avx_cvtsi2sd"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
@ -2533,7 +2543,8 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")])
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "double,direct")])
|
||||
|
||||
(define_insn "*avx_cvtsi2sdq"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
@ -2562,7 +2573,8 @@
|
||||
(set_attr "prefix_rex" "1")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")])
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "double,direct")])
|
||||
|
||||
(define_insn "sse2_cvtsd2si"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r,r")
|
||||
@ -2575,6 +2587,7 @@
|
||||
"%vcvtsd2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SI")])
|
||||
@ -2588,6 +2601,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SI")])
|
||||
@ -2603,6 +2617,7 @@
|
||||
"%vcvtsd2siq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "DI")])
|
||||
@ -2616,6 +2631,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "DI")])
|
||||
@ -2633,7 +2649,8 @@
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")])
|
||||
|
||||
(define_insn "sse2_cvttsd2siq"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
@ -2648,7 +2665,8 @@
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "bdver1_decode" "double,double")])
|
||||
|
||||
(define_insn "avx_cvtdq2pd256"
|
||||
[(set (match_operand:V4DF 0 "register_operand" "=x")
|
||||
@ -2716,7 +2734,8 @@
|
||||
(set_attr "prefix_data16" "0")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "avx_cvttpd2dq256"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
@ -2746,7 +2765,8 @@
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "*avx_cvtsd2ss"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
@ -2775,6 +2795,7 @@
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "direct,direct")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "*avx_cvtss2sd"
|
||||
@ -2805,6 +2826,7 @@
|
||||
"cvtss2sd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "bdver1_decode" "direct,direct")
|
||||
(set_attr "mode" "DF")])
|
||||
|
||||
(define_insn "avx_cvtpd2ps256"
|
||||
@ -2839,7 +2861,8 @@
|
||||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "V4SF")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_insn "avx_cvtps2pd256"
|
||||
[(set (match_operand:V4DF 0 "register_operand" "=x")
|
||||
@ -2875,7 +2898,8 @@
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "V2DF")
|
||||
(set_attr "prefix_data16" "0")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "bdver1_decode" "double")])
|
||||
|
||||
(define_expand "vec_unpacks_hi_v4sf"
|
||||
[(set (match_dup 2)
|
||||
|
Loading…
x
Reference in New Issue
Block a user