mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-03 05:10:26 +08:00
Add Power10 scheduling description.
2021-03-19 Pat Haugen <pthaugen@linux.ibm.com> gcc/ * config/rs6000/rs6000.c (power10_cost): New. (rs6000_option_override_internal): Set Power10 costs. (rs6000_issue_rate): Set Power10 issue rate. * config/rs6000/power10.md: Rewrite for Power10.
This commit is contained in:
parent
b8ecdc7727
commit
e1df2c3436
@ -1,9 +1,7 @@
|
||||
;; Scheduling description for IBM POWER10 processor.
|
||||
;; Copyright (C) 2016-2021 Free Software Foundation, Inc.
|
||||
;; Scheduling description for the IBM POWER10 processor.
|
||||
;; Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This is a clone of power9.md. It is intended to be a placeholder until a
|
||||
;; real scheduler model can be contributed.
|
||||
;; The original power9.md was contributed by Pat Haugen (pthaugen@us.ibm.com).
|
||||
;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
|
||||
|
||||
;; This file is part of GCC.
|
||||
;;
|
||||
@ -21,240 +19,215 @@
|
||||
;; along with GCC; see the file COPYING3. If not see
|
||||
;; <http://www.gnu.org/licenses/>.
|
||||
|
||||
;; This file was cloned from power9.md, it does not (yet) describe the actual
|
||||
;; POWER10 processor.
|
||||
; For Power10 we model (and try to pack) the in-order decode/dispatch groups
|
||||
; which consist of 8 instructions max. We do not try to model the details of
|
||||
; the out-of-order issue queues and how insns flow to the various execution
|
||||
; units except for the simple representation of the issue limitation of at
|
||||
; most 4 insns to the execution units/2 insns to the load units/2 insns to
|
||||
; the store units.
|
||||
(define_automaton "power10dispatch,power10issue")
|
||||
|
||||
(define_automaton "power10dsp,power10lsu,power10vsu,power10fpdiv,power10misc")
|
||||
; Decode/dispatch slots
|
||||
(define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
|
||||
du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch")
|
||||
|
||||
(define_cpu_unit "lsu0_power10,lsu1_power10,lsu2_power10,lsu3_power10" "power10lsu")
|
||||
(define_cpu_unit "vsu0_power10,vsu1_power10,vsu2_power10,vsu3_power10" "power10vsu")
|
||||
; Two vector permute units, part of vsu
|
||||
(define_cpu_unit "prm0_power10,prm1_power10" "power10vsu")
|
||||
; Two fixed point divide units, not pipelined
|
||||
(define_cpu_unit "fx_div0_power10,fx_div1_power10" "power10misc")
|
||||
(define_cpu_unit "bru_power10,cryptu_power10,dfu_power10" "power10misc")
|
||||
; Create a false unit for use by non-pipelined FP div/sqrt
|
||||
(define_cpu_unit "fp_div0_power10,fp_div1_power10,fp_div2_power10,fp_div3_power10"
|
||||
"power10fpdiv")
|
||||
; Four execution units
|
||||
(define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
|
||||
"power10issue")
|
||||
; Two load units and two store units
|
||||
(define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
|
||||
(define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
|
||||
|
||||
|
||||
(define_cpu_unit "x0_power10,x1_power10,xa0_power10,xa1_power10,
|
||||
x2_power10,x3_power10,xb0_power10,xb1_power10,
|
||||
br0_power10,br1_power10" "power10dsp")
|
||||
; Dispatch slots are allocated in order conforming to program order.
|
||||
(absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
|
||||
du5_power10,du6_power10,du7_power10")
|
||||
(absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
|
||||
du6_power10,du7_power10")
|
||||
(absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
|
||||
du7_power10")
|
||||
(absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
|
||||
(absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
|
||||
(absence_set "du5_power10" "du6_power10,du7_power10")
|
||||
(absence_set "du6_power10" "du7_power10")
|
||||
|
||||
|
||||
; Dispatch port reservations
|
||||
;
|
||||
; The processor can dispatch a maximum of 6 iops per cycle with the following
|
||||
; general restrictions (other restrictions also apply):
|
||||
; 1) At most 2 iops per execution slice
|
||||
; 2) At most 2 iops to the branch unit
|
||||
; Note that insn position in a dispatch group of 6 insns does not infer which
|
||||
; execution slice the insn is routed to. The units are used to infer the
|
||||
; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
|
||||
; with 2 insns with 'superslice' requirement).
|
||||
; Power10 can dispatch a maximum of 8 iops per cycle. With a maximum of
|
||||
; 4 VSU/2 Load/2 Store per cycle.
|
||||
|
||||
; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
|
||||
; are listed as separate units to allow those insns that preclude its use to
|
||||
; still be scheduled two to a superslice while reserving the 3rd slot. The
|
||||
; same applies for xb0/xb1.
|
||||
(define_reservation "DU_xa_power10" "xa0_power10+xa1_power10")
|
||||
(define_reservation "DU_xb_power10" "xb0_power10+xb1_power10")
|
||||
|
||||
; Any execution slice dispatch
|
||||
; Any dispatch slot
|
||||
(define_reservation "DU_any_power10"
|
||||
"x0_power10|x1_power10|DU_xa_power10|x2_power10|x3_power10|
|
||||
DU_xb_power10")
|
||||
"du0_power10|du1_power10|du2_power10|du3_power10|
|
||||
du4_power10|du5_power10|du6_power10|du7_power10")
|
||||
|
||||
; Even slice, actually takes even/odd slots
|
||||
(define_reservation "DU_even_power10" "x0_power10+x1_power10|x2_power10+x3_power10")
|
||||
; Even slot, actually takes even/odd slots
|
||||
(define_reservation "DU_even_power10"
|
||||
"du0_power10+du1_power10|du2_power10+du3_power10|
|
||||
du4_power10+du5_power10|du6_power10+du7_power10")
|
||||
|
||||
; Slice plus 3rd slot
|
||||
(define_reservation "DU_slice_3_power10"
|
||||
"x0_power10+xa0_power10|x1_power10+xa1_power10|
|
||||
x2_power10+xb0_power10|x3_power10+xb1_power10")
|
||||
|
||||
; Superslice
|
||||
(define_reservation "DU_super_power10"
|
||||
"x0_power10+x1_power10|x2_power10+x3_power10")
|
||||
|
||||
; 2-way cracked
|
||||
(define_reservation "DU_C2_power10" "x0_power10+x1_power10|
|
||||
x1_power10+DU_xa_power10|
|
||||
x1_power10+x2_power10|
|
||||
DU_xa_power10+x2_power10|
|
||||
x2_power10+x3_power10|
|
||||
x3_power10+DU_xb_power10")
|
||||
|
||||
; 2-way cracked plus 3rd slot
|
||||
(define_reservation "DU_C2_3_power10" "x0_power10+x1_power10+xa0_power10|
|
||||
x1_power10+x2_power10+xa1_power10|
|
||||
x2_power10+x3_power10+xb0_power10")
|
||||
|
||||
; 3-way cracked (consumes whole decode/dispatch cycle)
|
||||
(define_reservation "DU_C3_power10"
|
||||
"x0_power10+x1_power10+xa0_power10+xa1_power10+x2_power10+
|
||||
x3_power10+xb0_power10+xb1_power10+br0_power10+br1_power10")
|
||||
|
||||
; Branch ports
|
||||
(define_reservation "DU_branch_power10" "br0_power10|br1_power10")
|
||||
; 4-way cracked (consumes whole decode/dispatch cycle)
|
||||
(define_reservation "DU_all_power10"
|
||||
"du0_power10+du1_power10+du2_power10+du3_power10+
|
||||
du4_power10+du5_power10+du6_power10+du7_power10")
|
||||
|
||||
|
||||
; Execution unit reservations
|
||||
(define_reservation "LSU_power10"
|
||||
"lsu0_power10|lsu1_power10|lsu2_power10|lsu3_power10")
|
||||
(define_reservation "LU_power10"
|
||||
"lu0_power10|lu1_power10")
|
||||
|
||||
(define_reservation "LSU_pair_power10"
|
||||
"lsu0_power10+lsu1_power10|lsu1_power10+lsu2_power10|
|
||||
lsu2_power10+lsu3_power10|lsu3_power10+lsu0_power10")
|
||||
(define_reservation "STU_power10"
|
||||
"stu0_power10|stu1_power10")
|
||||
|
||||
(define_reservation "VSU_power10"
|
||||
"vsu0_power10|vsu1_power10|vsu2_power10|vsu3_power10")
|
||||
; Certain simple fixed-point insns can execute in the Store-agen pipe
|
||||
(define_reservation "SXU_power10"
|
||||
"stu0_power10|stu1_power10")
|
||||
|
||||
(define_reservation "VSU_super_power10"
|
||||
"vsu0_power10+vsu1_power10|vsu2_power10+vsu3_power10")
|
||||
(define_reservation "EXU_power10"
|
||||
"exu0_power10|exu1_power10|exu2_power10|exu3_power10")
|
||||
|
||||
(define_reservation "VSU_PRM_power10" "prm0_power10|prm1_power10")
|
||||
|
||||
; Define the reservation to be used by FP div/sqrt which allows other insns
|
||||
; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
|
||||
; Note that the number of cycles blocked varies depending on insn, but we
|
||||
; just use the same number for all in order to keep the number of DFA states
|
||||
; reasonable.
|
||||
(define_reservation "FP_DIV_power10"
|
||||
"fp_div0_power10*8|fp_div1_power10*8|fp_div2_power10*8|
|
||||
fp_div3_power10*8")
|
||||
(define_reservation "VEC_DIV_power10"
|
||||
"fp_div0_power10*8+fp_div1_power10*8|
|
||||
fp_div2_power10*8+fp_div3_power10*8")
|
||||
(define_reservation "EXU_super_power10"
|
||||
"exu0_power10+exu1_power10|exu2_power10+exu3_power10")
|
||||
|
||||
|
||||
; LS Unit
|
||||
; Load Unit
|
||||
(define_insn_reservation "power10-load" 4
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "sign_extend" "no")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "prefixed" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,LSU_power10")
|
||||
"DU_any_power10,LU_power10")
|
||||
|
||||
(define_insn_reservation "power10-prefixed-load" 4
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "prefixed" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,LU_power10")
|
||||
|
||||
(define_insn_reservation "power10-load-update" 4
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "sign_extend" "no")
|
||||
(eq_attr "update" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_power10,LSU_power10+VSU_power10")
|
||||
|
||||
(define_insn_reservation "power10-load-ext" 6
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "sign_extend" "yes")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_power10,LSU_power10")
|
||||
|
||||
(define_insn_reservation "power10-load-ext-update" 6
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "sign_extend" "yes")
|
||||
(eq_attr "update" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C3_power10,LSU_power10+VSU_power10")
|
||||
"DU_even_power10,LU_power10+SXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-fpload-double" 4
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "size" "64")
|
||||
(eq_attr "prefixed" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,LSU_power10")
|
||||
"DU_any_power10,LU_power10")
|
||||
|
||||
(define_insn_reservation "power10-prefixed-fpload-double" 4
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "size" "64")
|
||||
(eq_attr "prefixed" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,LU_power10")
|
||||
|
||||
(define_insn_reservation "power10-fpload-update-double" 4
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "update" "yes")
|
||||
(eq_attr "size" "64")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,LSU_power10+VSU_power10")
|
||||
"DU_even_power10,LU_power10+SXU_power10")
|
||||
|
||||
; SFmode loads are cracked and have additional 2 cycles over DFmode
|
||||
(define_insn_reservation "power10-fpload-single" 6
|
||||
; SFmode loads are cracked and have additional 3 cycles over DFmode
|
||||
; Prefixed forms behave the same
|
||||
(define_insn_reservation "power10-fpload-single" 7
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "size" "32")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,LSU_power10")
|
||||
"DU_even_power10,LU_power10")
|
||||
|
||||
(define_insn_reservation "power10-fpload-update-single" 6
|
||||
(define_insn_reservation "power10-fpload-update-single" 7
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "update" "yes")
|
||||
(eq_attr "size" "32")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C3_power10,LSU_power10+VSU_power10")
|
||||
"DU_even_power10,LU_power10+SXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vecload" 5
|
||||
(define_insn_reservation "power10-vecload" 4
|
||||
(and (eq_attr "type" "vecload")
|
||||
(eq_attr "size" "!256")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,LSU_pair_power10")
|
||||
"DU_any_power10,LU_power10")
|
||||
|
||||
; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
|
||||
; lxvp
|
||||
(define_insn_reservation "power10-vecload-pair" 4
|
||||
(and (eq_attr "type" "vecload")
|
||||
(eq_attr "size" "256")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,LU_power10+SXU_power10")
|
||||
|
||||
; Store Unit
|
||||
(define_insn_reservation "power10-store" 0
|
||||
(and (eq_attr "type" "store")
|
||||
(and (eq_attr "type" "store,fpstore,vecstore")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "indexed" "no")
|
||||
(eq_attr "prefixed" "no")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "size" "!256")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,LSU_power10")
|
||||
"DU_any_power10,STU_power10")
|
||||
|
||||
(define_insn_reservation "power10-store-indexed" 0
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "indexed" "yes")
|
||||
(define_insn_reservation "power10-prefixed-store" 0
|
||||
(and (eq_attr "type" "store,fpstore,vecstore")
|
||||
(eq_attr "prefixed" "yes")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "size" "!256")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,LSU_power10")
|
||||
"DU_even_power10,STU_power10")
|
||||
|
||||
; Update forms have 2 cycle latency for updated addr reg
|
||||
(define_insn_reservation "power10-store-update" 2
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "update" "yes")
|
||||
(eq_attr "indexed" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,LSU_power10+VSU_power10")
|
||||
|
||||
; Update forms have 2 cycle latency for updated addr reg
|
||||
(define_insn_reservation "power10-store-update-indexed" 2
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "update" "yes")
|
||||
(eq_attr "indexed" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,LSU_power10+VSU_power10")
|
||||
|
||||
(define_insn_reservation "power10-fpstore" 0
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(eq_attr "update" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,LSU_power10")
|
||||
|
||||
; Update forms have 2 cycle latency for updated addr reg
|
||||
(define_insn_reservation "power10-fpstore-update" 2
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "update" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,LSU_power10+VSU_power10")
|
||||
"DU_any_power10,STU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vecstore" 0
|
||||
; stxvp
|
||||
(define_insn_reservation "power10-vecstore-pair" 0
|
||||
(and (eq_attr "type" "vecstore")
|
||||
(eq_attr "size" "256")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,LSU_pair_power10")
|
||||
"DU_even_power10,stu0_power10+stu1_power10")
|
||||
|
||||
(define_insn_reservation "power10-larx" 4
|
||||
(and (eq_attr "type" "load_l")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,LSU_power10")
|
||||
"DU_any_power10,LU_power10")
|
||||
|
||||
(define_insn_reservation "power10-stcx" 2
|
||||
; All load quad forms
|
||||
(define_insn_reservation "power10-lq" 4
|
||||
(and (eq_attr "type" "load,load_l")
|
||||
(eq_attr "size" "128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,LU_power10+SXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-stcx" 0
|
||||
(and (eq_attr "type" "store_c")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,LSU_power10+VSU_power10")
|
||||
"DU_any_power10,STU_power10")
|
||||
|
||||
(define_insn_reservation "power10-sync" 4
|
||||
; All store quad forms
|
||||
(define_insn_reservation "power10-stq" 0
|
||||
(and (eq_attr "type" "store,store_c")
|
||||
(eq_attr "size" "128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,stu0_power10+stu1_power10")
|
||||
|
||||
(define_insn_reservation "power10-sync" 1
|
||||
(and (eq_attr "type" "sync,isync")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,LSU_power10")
|
||||
"DU_even_power10,STU_power10")
|
||||
|
||||
|
||||
; VSU Execution Unit
|
||||
@ -264,258 +237,302 @@
|
||||
; Most ALU insns are simple 2 cycle, including record form
|
||||
(define_insn_reservation "power10-alu" 2
|
||||
(and (eq_attr "type" "add,exts,integer,logical,isel")
|
||||
(eq_attr "prefixed" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
; 5 cycle CR latency
|
||||
(define_bypass 5 "power10-alu"
|
||||
"DU_any_power10,EXU_power10")
|
||||
; 4 cycle CR latency
|
||||
(define_bypass 4 "power10-alu"
|
||||
"power10-crlogical,power10-mfcr,power10-mfcrf")
|
||||
|
||||
; Rotate/shift prevent use of third slot
|
||||
; paddi
|
||||
(define_insn_reservation "power10-paddi" 2
|
||||
(and (eq_attr "type" "add")
|
||||
(eq_attr "prefixed" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,EXU_power10")
|
||||
|
||||
; Rotate/shift (non-record form)
|
||||
(define_insn_reservation "power10-rot" 2
|
||||
(and (eq_attr "type" "insert,shift")
|
||||
(eq_attr "dot" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
; Record form rotate/shift are cracked
|
||||
(define_insn_reservation "power10-cracked-alu" 2
|
||||
; Record form rotate/shift
|
||||
(define_insn_reservation "power10-rot-compare" 3
|
||||
(and (eq_attr "type" "insert,shift")
|
||||
(eq_attr "dot" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,VSU_power10")
|
||||
; 7 cycle CR latency
|
||||
(define_bypass 7 "power10-cracked-alu"
|
||||
"DU_any_power10,EXU_power10")
|
||||
; 5 cycle CR latency
|
||||
(define_bypass 5 "power10-rot-compare"
|
||||
"power10-crlogical,power10-mfcr,power10-mfcrf")
|
||||
|
||||
(define_insn_reservation "power10-alu2" 3
|
||||
(and (eq_attr "type" "cntlz,popcnt,trap")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
; 6 cycle CR latency
|
||||
(define_bypass 6 "power10-alu2"
|
||||
"DU_any_power10,EXU_power10")
|
||||
; 5 cycle CR latency
|
||||
(define_bypass 5 "power10-alu2"
|
||||
"power10-crlogical,power10-mfcr,power10-mfcrf")
|
||||
|
||||
(define_insn_reservation "power10-cmp" 2
|
||||
(and (eq_attr "type" "cmp")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
; Treat 'two' and 'three' types as 2 or 3 way cracked
|
||||
(define_insn_reservation "power10-two" 4
|
||||
(and (eq_attr "type" "two")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_power10,VSU_power10")
|
||||
"DU_even_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-three" 6
|
||||
(and (eq_attr "type" "three")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C3_power10,VSU_power10")
|
||||
"DU_all_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-mul" 5
|
||||
(and (eq_attr "type" "mul")
|
||||
(eq_attr "dot" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
; 4 cycle MUL->MUL latency
|
||||
(define_bypass 4 "power10-mul"
|
||||
"power10-mul,power10-mul-compare")
|
||||
|
||||
(define_insn_reservation "power10-mul-compare" 5
|
||||
(and (eq_attr "type" "mul")
|
||||
(eq_attr "dot" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_3_power10,VSU_power10")
|
||||
; 10 cycle CR latency
|
||||
(define_bypass 10 "power10-mul-compare"
|
||||
"DU_even_power10,EXU_power10")
|
||||
; 4 cycle MUL->MUL latency
|
||||
(define_bypass 4 "power10-mul-compare"
|
||||
"power10-mul,power10-mul-compare")
|
||||
; 7 cycle CR latency
|
||||
(define_bypass 7 "power10-mul-compare"
|
||||
"power10-crlogical,power10-mfcr,power10-mfcrf")
|
||||
|
||||
; Fixed point divides reserve the divide units for a minimum of 8 cycles
|
||||
(define_insn_reservation "power10-idiv" 16
|
||||
(define_insn_reservation "power10-div" 12
|
||||
(and (eq_attr "type" "div")
|
||||
(eq_attr "size" "32")
|
||||
(eq_attr "dot" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-ldiv" 24
|
||||
(define_insn_reservation "power10-div-compare" 12
|
||||
(and (eq_attr "type" "div")
|
||||
(eq_attr "size" "64")
|
||||
(eq_attr "dot" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
|
||||
"DU_even_power10,EXU_power10")
|
||||
; 14 cycle CR latency
|
||||
(define_bypass 14 "power10-div-compare"
|
||||
"power10-crlogical,power10-mfcr,power10-mfcrf")
|
||||
|
||||
(define_insn_reservation "power10-crlogical" 2
|
||||
(and (eq_attr "type" "cr_logical")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-mfcrf" 2
|
||||
(and (eq_attr "type" "mfcrf")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-mfcr" 6
|
||||
(define_insn_reservation "power10-mfcr" 3
|
||||
(and (eq_attr "type" "mfcr")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C3_power10,VSU_power10")
|
||||
"DU_even_power10,EXU_power10")
|
||||
|
||||
; Should differentiate between 1 cr field and > 1 since target of > 1 cr
|
||||
; is cracked
|
||||
(define_insn_reservation "power10-mtcr" 2
|
||||
(define_insn_reservation "power10-mtcr" 3
|
||||
(and (eq_attr "type" "mtcr")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
; Move to LR/CTR are executed in VSU
|
||||
(define_insn_reservation "power10-mtjmpr" 5
|
||||
(define_insn_reservation "power10-mtjmpr" 3
|
||||
(and (eq_attr "type" "mtjmpr")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-mfjmpr" 2
|
||||
(and (eq_attr "type" "mfjmpr")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
|
||||
; Floating point/Vector ops
|
||||
(define_insn_reservation "power10-fpsimple" 2
|
||||
|
||||
(define_insn_reservation "power10-fpsimple" 3
|
||||
(and (eq_attr "type" "fpsimple")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-fp" 5
|
||||
(and (eq_attr "type" "fp,dmul")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-fpcompare" 3
|
||||
(and (eq_attr "type" "fpcompare")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other
|
||||
; div/sqrt insns, but for the most part do not block pipelined ops.
|
||||
(define_insn_reservation "power10-sdiv" 22
|
||||
(and (eq_attr "type" "sdiv")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10,FP_DIV_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-ddiv" 27
|
||||
(and (eq_attr "type" "ddiv")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10,FP_DIV_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-sqrt" 26
|
||||
(and (eq_attr "type" "ssqrt")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10,FP_DIV_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-dsqrt" 36
|
||||
(and (eq_attr "type" "dsqrt")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10,FP_DIV_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vec-2cyc" 2
|
||||
(and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_super_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-veccmp" 3
|
||||
(and (eq_attr "type" "veccmp")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_super_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vecsimple" 3
|
||||
(define_insn_reservation "power10-vecsimple" 2
|
||||
(and (eq_attr "type" "vecsimple")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_super_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vecnormal" 7
|
||||
(define_insn_reservation "power10-vecnormal" 5
|
||||
(and (eq_attr "type" "vecfloat,vecdouble")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_super_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
; Quad-precision FP ops, execute in DFU
|
||||
(define_insn_reservation "power10-qp" 12
|
||||
(and (eq_attr "type" "vecfloat,vecdouble")
|
||||
(eq_attr "size" "128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,dfu_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vecperm" 3
|
||||
(and (eq_attr "type" "vecperm")
|
||||
(eq_attr "prefixed" "no")
|
||||
(eq_attr "dot" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_PRM_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-veccomplex" 7
|
||||
(define_insn_reservation "power10-vecperm-compare" 3
|
||||
(and (eq_attr "type" "vecperm")
|
||||
(eq_attr "dot" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-prefixed-vecperm" 3
|
||||
(and (eq_attr "type" "vecperm")
|
||||
(eq_attr "prefixed" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-veccomplex" 6
|
||||
(and (eq_attr "type" "veccomplex")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_super_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vecfdiv" 24
|
||||
(and (eq_attr "type" "vecfdiv")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_super_power10,VEC_DIV_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-vecdiv" 27
|
||||
(and (eq_attr "type" "vecdiv")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,VSU_super_power10,VEC_DIV_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
; Use 8 for DFU reservation on QP div/mul to limit DFA state size
|
||||
(define_insn_reservation "power10-qpdiv" 56
|
||||
(and (eq_attr "type" "vecdiv")
|
||||
(eq_attr "size" "128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,dfu_power10*8")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-qpmul" 24
|
||||
(and (eq_attr "type" "qmul")
|
||||
(eq_attr "size" "128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,dfu_power10*8")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-mtvsr" 2
|
||||
(and (eq_attr "type" "mtvsr")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-mfvsr" 2
|
||||
(and (eq_attr "type" "mfvsr")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_slice_3_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
|
||||
; Branch Unit
|
||||
; Move from LR/CTR are executed in BRU but consume a writeback port from an
|
||||
; execution slice.
|
||||
(define_insn_reservation "power10-mfjmpr" 6
|
||||
(and (eq_attr "type" "mfjmpr")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_branch_power10,bru_power10+VSU_power10")
|
||||
|
||||
; Branch is 2 cycles
|
||||
; Branch
|
||||
; Branch is 2 cycles, grouped with STU for issue
|
||||
(define_insn_reservation "power10-branch" 2
|
||||
(and (eq_attr "type" "jmpreg,branch")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_branch_power10,bru_power10")
|
||||
"DU_any_power10,STU_power10")
|
||||
|
||||
|
||||
; Crypto Unit
|
||||
(define_insn_reservation "power10-crypto" 6
|
||||
; Crypto
|
||||
(define_insn_reservation "power10-crypto" 4
|
||||
(and (eq_attr "type" "crypto")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_super_power10,cryptu_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
|
||||
; HTM Unit
|
||||
(define_insn_reservation "power10-htm" 4
|
||||
(and (eq_attr "type" "htm")
|
||||
; HTM
|
||||
(define_insn_reservation "power10-htm" 2
|
||||
(and (eq_attr "type" "htmsimple,htm")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_C2_power10,LSU_power10")
|
||||
|
||||
(define_insn_reservation "power10-htm-simple" 2
|
||||
(and (eq_attr "type" "htmsimple")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,VSU_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
|
||||
; DFP Unit
|
||||
; DFP
|
||||
; Use the minimum 12 cycle latency for all DFP insns
|
||||
(define_insn_reservation "power10-dfp" 12
|
||||
(and (eq_attr "type" "dfp")
|
||||
(eq_attr "size" "!128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,dfu_power10")
|
||||
"DU_any_power10,EXU_power10")
|
||||
|
||||
(define_insn_reservation "power10-dfpq" 12
|
||||
(and (eq_attr "type" "dfp")
|
||||
(eq_attr "size" "128")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,EXU_power10")
|
||||
|
||||
; MMA
|
||||
(define_insn_reservation "power10-mma" 9
|
||||
(and (eq_attr "type" "mma")
|
||||
(eq_attr "prefixed" "no")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_any_power10,EXU_super_power10")
|
||||
|
||||
(define_insn_reservation "power10-prefixed-mma" 9
|
||||
(and (eq_attr "type" "mma")
|
||||
(eq_attr "prefixed" "yes")
|
||||
(eq_attr "cpu" "power10"))
|
||||
"DU_even_power10,EXU_super_power10")
|
||||
; 4 cycle MMA->MMA latency
|
||||
(define_bypass 4 "power10-mma,power10-prefixed-mma"
|
||||
"power10-mma,power10-prefixed-mma")
|
||||
|
||||
|
||||
|
@ -1080,6 +1080,26 @@ struct processor_costs power9_cost = {
|
||||
COSTS_N_INSNS (3), /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on POWER10 processors. */
|
||||
static const
|
||||
struct processor_costs power10_cost = {
|
||||
COSTS_N_INSNS (2), /* mulsi */
|
||||
COSTS_N_INSNS (2), /* mulsi_const */
|
||||
COSTS_N_INSNS (2), /* mulsi_const9 */
|
||||
COSTS_N_INSNS (2), /* muldi */
|
||||
COSTS_N_INSNS (6), /* divsi */
|
||||
COSTS_N_INSNS (6), /* divdi */
|
||||
COSTS_N_INSNS (2), /* fp */
|
||||
COSTS_N_INSNS (2), /* dmul */
|
||||
COSTS_N_INSNS (11), /* sdiv */
|
||||
COSTS_N_INSNS (13), /* ddiv */
|
||||
128, /* cache line size */
|
||||
32, /* l1 cache */
|
||||
512, /* l2 cache */
|
||||
16, /* prefetch streams */
|
||||
COSTS_N_INSNS (2), /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on POWER A2 processors. */
|
||||
static const
|
||||
struct processor_costs ppca2_cost = {
|
||||
@ -4774,10 +4794,13 @@ rs6000_option_override_internal (bool global_init_p)
|
||||
break;
|
||||
|
||||
case PROCESSOR_POWER9:
|
||||
case PROCESSOR_POWER10:
|
||||
rs6000_cost = &power9_cost;
|
||||
break;
|
||||
|
||||
case PROCESSOR_POWER10:
|
||||
rs6000_cost = &power10_cost;
|
||||
break;
|
||||
|
||||
case PROCESSOR_PPCA2:
|
||||
rs6000_cost = &ppca2_cost;
|
||||
break;
|
||||
@ -18443,8 +18466,9 @@ rs6000_issue_rate (void)
|
||||
case PROCESSOR_POWER8:
|
||||
return 7;
|
||||
case PROCESSOR_POWER9:
|
||||
case PROCESSOR_POWER10:
|
||||
return 6;
|
||||
case PROCESSOR_POWER10:
|
||||
return 8;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user