mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-22 17:40:45 +08:00
cell.md: New file.
2006-11-13 Andrew Pinski <andrew_pinski@playstation.sony.com> * config/rs6000/cell.md: New file. * config/rs6000/rs6000.c (rs6000_cell_dont_microcode): New variable. (ppccell_cost): New cost matrix. (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD): Define. (rs6000_override_options): Set rs6000_always_hint to false for cell. Also align functions/lables/loops to 8byte for the Cell. Use PROCESSOR_CELL. (rs6000_emit_epilogue): Rename using_mfcr_multiple to using_mtcr_multiple. (rs6000_variable_issue): If the insn is a nonpipelined instruction on the Cell, return 0. (rs6000_adjust_cost): Add Cell cost adjustments. (is_microcoded_insn): Return true for Cell microcoded instructions. (is_nonpipeline_insn): New function. (rs6000_issue_rate): Add PROCESSOR_CELL. (rs6000_use_sched_lookahead): If Cell, then we should look ahead 8 instructions. (rs6000_use_sched_lookahead_guard): New function. (rs6000_sched_reorder): Reorder the ready list, if the second to last ready insn is a nonepipeline insn on the Cell. * config/rs6000/rs6000.h (processor_type): Add PROCESSOR_CELL. (ASM_CPU_SPEC): Add Cell. * config/rs6000/rs6000.md (cpu): Add Cell. (cell_micro): New Attr. Include cell.md From-SVN: r118776
This commit is contained in:
parent
e8116f4026
commit
d296e02ef8
@ -1,3 +1,33 @@
|
||||
2006-11-13 Andrew Pinski <andrew_pinski@playstation.sony.com>
|
||||
|
||||
* config/rs6000/cell.md: New file.
|
||||
* config/rs6000/rs6000.c (rs6000_cell_dont_microcode): New
|
||||
variable.
|
||||
(ppccell_cost): New cost matrix.
|
||||
(TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD): Define.
|
||||
(rs6000_override_options): Set rs6000_always_hint to false
|
||||
for cell. Also align functions/lables/loops to 8byte
|
||||
for the Cell. Use PROCESSOR_CELL.
|
||||
(rs6000_emit_epilogue): Rename using_mfcr_multiple to
|
||||
using_mtcr_multiple.
|
||||
(rs6000_variable_issue): If the insn is a nonpipelined instruction
|
||||
on the Cell, return 0.
|
||||
(rs6000_adjust_cost): Add Cell cost adjustments.
|
||||
(is_microcoded_insn): Return true for Cell microcoded
|
||||
instructions.
|
||||
(is_nonpipeline_insn): New function.
|
||||
(rs6000_issue_rate): Add PROCESSOR_CELL.
|
||||
(rs6000_use_sched_lookahead): If Cell, then we should look ahead 8
|
||||
instructions.
|
||||
(rs6000_use_sched_lookahead_guard): New function.
|
||||
(rs6000_sched_reorder): Reorder the ready list, if the second
|
||||
to last ready insn is a nonepipeline insn on the Cell.
|
||||
* config/rs6000/rs6000.h (processor_type): Add PROCESSOR_CELL.
|
||||
(ASM_CPU_SPEC): Add Cell.
|
||||
* config/rs6000/rs6000.md (cpu): Add Cell.
|
||||
(cell_micro): New Attr.
|
||||
Include cell.md
|
||||
|
||||
2006-11-13 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* configure.ac (ld_vers): Parse GNU ld version 2.17.50.0.3-6 20060715
|
||||
|
401
gcc/config/rs6000/cell.md
Normal file
401
gcc/config/rs6000/cell.md
Normal file
@ -0,0 +1,401 @@
|
||||
;; Scheduling description for cell processor.
|
||||
;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
|
||||
;; Free Software Foundation, Inc.
|
||||
;; Contributed by Sony Computer Entertainment, Inc.,
|
||||
|
||||
|
||||
;; This file is free software; you can redistribute it and/or modify it under
|
||||
;; the terms of the GNU General Public License as published by the Free
|
||||
;; Software Foundation; either version 2 of the License, or (at your option)
|
||||
;; any later version.
|
||||
|
||||
;; This file is distributed in the hope that it will be useful, but WITHOUT
|
||||
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
;; for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with this file; see the file COPYING. If not, write to the Free
|
||||
;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
;; 02110-1301, USA.
|
||||
|
||||
;; Sources: BE BOOK4 (/sfs/enc/doc/PPU_BookIV_DD3.0_latest.pdf)
|
||||
|
||||
;; BE Architechture *DD3.0 and DD3.1*
|
||||
;; This file simulate PPU processor unit backend of pipeline, maualP24.
|
||||
;; manual P27, stall and flush points
|
||||
;; IU, XU, VSU, dipatcher decodes and dispatch 2 insns per cycle in program
|
||||
;; order, the grouped adress are aligned by 8
|
||||
;; This file only simulate one thread situation
|
||||
;; XU executes all fixed point insns(3 units, a simple alu, a complex unit,
|
||||
;; and load/store unit)
|
||||
;; VSU executes all scalar floating points insn(a float unit),
|
||||
;; VMX insns(VMX unit, 4 sub units, simple, permute, complex, floating point)
|
||||
|
||||
;; Dual issue combination
|
||||
|
||||
;; FXU LSU BR VMX VMX
|
||||
;; (sx,cx,vsu_fp,fp_arith) (perm,vsu_ls,fp_ls)
|
||||
;;FXU X
|
||||
;;LSU X X X
|
||||
;;BR X
|
||||
;;VMX(sx,cx,vsu_fp,fp_arth) X
|
||||
;;VMX(perm,vsu_ls, fp_ls) X
|
||||
;; X are illegal combination.
|
||||
|
||||
;; Dual issue exceptons:
|
||||
;;(1) nop-pipelined FXU instr in slot 0
|
||||
;;(2) non-pipelined FPU inst in slot 0
|
||||
;; CSI instr(contex-synchronizing insn)
|
||||
;; Microcode insn
|
||||
|
||||
;; BRU unit: bru(none register stall), bru_cr(cr register stall)
|
||||
;; VSU unit: vus(vmx simple), vup(vmx permute), vuc(vmx complex),
|
||||
;; vuf(vmx float), fpu(floats). fpu_div is hypthetical, it is for
|
||||
;; nonpipelined simulation
|
||||
;; micr insns will stall at least 7 cycles to get the first instr from ROM,
|
||||
;; micro instructions are not dual issued.
|
||||
|
||||
;; slot0 is older than slot1
|
||||
;; non-pipelined insn need to be in slot1 to avoid 1cycle stall
|
||||
|
||||
;; There different stall point
|
||||
;; IB2, only stall one thread if stall here, so try to stall here as much as
|
||||
;; we can
|
||||
;; condition(1) insert nop, OR and ORI instruction form
|
||||
;; condition(2) flush happens, in case of: RAW, WAW, D-ERAT miss, or
|
||||
;; CR0-access while stdcx, or stwcx
|
||||
;; IS2 stall ;; Page91 for details
|
||||
;; VQ8 stall
|
||||
;; IS2 stall can be activated by VQ8 stall and trying to issue a vsu instr to
|
||||
;; the vsu issue queue
|
||||
|
||||
;;(define_automaton "cellxu")
|
||||
|
||||
;;(define_cpu_unit "fxu_cell,lsu_cell,bru_cell,vsu1_cell,vsu2_cell" "cellxu")
|
||||
|
||||
;; ndfa
|
||||
(define_automaton "cellxu,cellvsu,cellbru,cell_mis")
|
||||
|
||||
(define_cpu_unit "fxu_cell,lsu_cell" "cellxu")
|
||||
(define_cpu_unit "bru_cell" "cellbru")
|
||||
(define_cpu_unit "vsu1_cell,vsu2_cell" "cellvsu")
|
||||
|
||||
(define_cpu_unit "slot0,slot1" "cell_mis")
|
||||
|
||||
(absence_set "slot0" "slot1")
|
||||
|
||||
(define_reservation "nonpipeline" "fxu_cell+lsu_cell+vsu1_cell+vsu2_cell")
|
||||
(define_reservation "slot01" "slot0|slot1")
|
||||
|
||||
|
||||
;; Load/store
|
||||
;; lmw, lswi, lswx are only generated for optimize for space, MC,
|
||||
;; these instr are not simulated
|
||||
(define_insn_reservation "cell-load" 2
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,lsu_cell")
|
||||
|
||||
;; ldux, ldu, lbzux, lbzu, hardware breaks it down to two instrs,
|
||||
;; if with 32bytes alignment, CMC
|
||||
(define_insn_reservation "cell-load-ux" 2
|
||||
(and (eq_attr "type" "load_ux,load_u")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,fxu_cell+lsu_cell")
|
||||
|
||||
;; lha, lhax, lhau, lhaux, lwa, lwax, lwaux, MC, latency unknown
|
||||
;; 11/7, 11/8, 11/12
|
||||
(define_insn_reservation "cell-load-ext" 2
|
||||
(and (eq_attr "type" "load_ext,load_ext_u,load_ext_ux")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,fxu_cell+lsu_cell")
|
||||
|
||||
;;lfs,lfsx,lfd,lfdx, 1 cycle
|
||||
(define_insn_reservation "cell-fpload" 1
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"vsu2_cell+lsu_cell+slot01")
|
||||
|
||||
;; lfsu,lfsux,lfdu,lfdux 1cycle(fpr) 2 cycle(gpr)
|
||||
(define_insn_reservation "cell-fpload-update" 1
|
||||
(and (eq_attr "type" "fpload,fpload_u,fpload_ux")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"fxu_cell+vsu2_cell+lsu_cell+slot01")
|
||||
|
||||
(define_insn_reservation "cell-vecload" 2
|
||||
(and (eq_attr "type" "vecload")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,vsu2_cell+lsu_cell")
|
||||
|
||||
;;st? stw(MC)
|
||||
(define_insn_reservation "cell-store" 1
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"lsu_cell+slot01")
|
||||
|
||||
;;stdux, stdu, (hardware breaks into store and add) 2 for update reg
|
||||
(define_insn_reservation "cell-store-update" 1
|
||||
(and (eq_attr "type" "store_ux,store_u")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"fxu_cell+lsu_cell+slot01")
|
||||
|
||||
(define_insn_reservation "cell-fpstore" 1
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"vsu2_cell+lsu_cell+slot01")
|
||||
|
||||
(define_insn_reservation "cell-fpstore-update" 1
|
||||
(and (eq_attr "type" "fpstore_ux,fpstore_u")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"vsu2_cell+fxu_cell+lsu_cell+slot01")
|
||||
|
||||
(define_insn_reservation "cell-vecstore" 1
|
||||
(and (eq_attr "type" "vecstore")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"vsu2_cell+lsu_cell+slot01")
|
||||
|
||||
;; Integer latency is 2 cycles
|
||||
(define_insn_reservation "cell-integer" 2
|
||||
(and (eq_attr "type" "integer,insert_dword,shift,trap,\
|
||||
var_shift_rotate,cntlz,exts")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,fxu_cell")
|
||||
|
||||
;; Two integer latency is 4 cycles
|
||||
(define_insn_reservation "cell-two" 4
|
||||
(and (eq_attr "type" "two")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,fxu_cell,fxu_cell*2")
|
||||
|
||||
;; Three integer latency is 6 cycles
|
||||
(define_insn_reservation "cell-three" 6
|
||||
(and (eq_attr "type" "three")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,fxu_cell,fxu_cell*4")
|
||||
|
||||
;; rlwimi, alter cr0
|
||||
(define_insn_reservation "cell-insert" 2
|
||||
(and (eq_attr "type" "insert_word")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,fxu_cell")
|
||||
|
||||
;; cmpi, cmpli, cmpla, add, addo, sub, subo, alter cr0
|
||||
(define_insn_reservation "cell-cmp" 1
|
||||
(and (eq_attr "type" "cmp")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"fxu_cell+slot01")
|
||||
|
||||
;; add, addo, sub, subo, alter cr0, rldcli, rlwinm
|
||||
(define_insn_reservation "cell-fast-cmp" 2
|
||||
(and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\
|
||||
var_delayed_compare")
|
||||
(eq_attr "cpu" "cell"))
|
||||
(eq_attr "cell_micro" "not"))
|
||||
"slot01,fxu_cell")
|
||||
|
||||
(define_insn_reservation "cell-cmp-microcoded" 9
|
||||
(and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\
|
||||
var_delayed_compare")
|
||||
(eq_attr "cpu" "cell"))
|
||||
(eq_attr "cell_micro" "always"))
|
||||
"slot0+slot1,fxu_cell,fxu_cell*7")
|
||||
|
||||
;; mulld
|
||||
(define_insn_reservation "cell-lmul" 15
|
||||
(and (eq_attr "type" "lmul")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*13")
|
||||
|
||||
;; mulld. is microcoded
|
||||
(define_insn_reservation "cell-lmul-cmp" 22
|
||||
(and (eq_attr "type" "lmul_compare")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot0+slot1,nonpipeline,nonpipeline*20")
|
||||
|
||||
;; mulli, 6 cycles
|
||||
(define_insn_reservation "cell-imul23" 6
|
||||
(and (eq_attr "type" "imul2,imul3")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*4")
|
||||
|
||||
;; mullw, 9
|
||||
(define_insn_reservation "cell-imul" 9
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*7")
|
||||
|
||||
;; divide
|
||||
(define_insn_reservation "cell-idiv" 32
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*30")
|
||||
|
||||
(define_insn_reservation "cell-ldiv" 64
|
||||
(and (eq_attr "type" "ldiv")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*62")
|
||||
|
||||
;;mflr and mfctr are pipelined
|
||||
(define_insn_reservation "cell-mfjmpr" 1
|
||||
(and (eq_attr "type" "mfjmpr")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01+bru_cell")
|
||||
|
||||
;;mtlr and mtctr,
|
||||
;;mtspr fully pipelined
|
||||
(define_insn_reservation "cell-mtjmpr" 1
|
||||
(and (eq_attr "type" "mtjmpr")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"bru_cell+slot01")
|
||||
|
||||
;; Branches
|
||||
;; b, ba, bl, bla, unconditional branch always predicts correctly n/a latency
|
||||
;; bcctr, bcctrl, latency 2, actually adjust by be to 4
|
||||
(define_insn_reservation "cell-branch" 1
|
||||
(and (eq_attr "type" "branch")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"bru_cell+slot1")
|
||||
|
||||
(define_insn_reservation "cell-branchreg" 1
|
||||
(and (eq_attr "type" "jmpreg")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"bru_cell+slot1")
|
||||
|
||||
;; cr hazard
|
||||
;; page 90, special cases for CR hazard, only one instr can access cr per cycle
|
||||
;; if insn reads CR following a stwcx, pipeline stall till stwcx finish
|
||||
(define_insn_reservation "cell-crlogical" 1
|
||||
(and (eq_attr "type" "cr_logical,delayed_cr")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"bru_cell+slot01")
|
||||
|
||||
;; mfcrf and mfcr is about 34 cycles and nonpipelined
|
||||
(define_insn_reservation "cell-mfcr" 34
|
||||
(and (eq_attr "type" "mfcrf,mfcr")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*32")
|
||||
|
||||
;; mtcrf (1 field)
|
||||
(define_insn_reservation "cell-mtcrf" 1
|
||||
(and (eq_attr "type" "mtcr")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"fxu_cell+slot01")
|
||||
|
||||
; Basic FP latency is 10 cycles, thoughput is 1/cycle
|
||||
(define_insn_reservation "cell-fp" 10
|
||||
(and (eq_attr "type" "fp,dmul")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,vsu1_cell,vsu1_cell*8")
|
||||
|
||||
(define_insn_reservation "cell-fpcompare" 1
|
||||
(and (eq_attr "type" "fpcompare")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"vsu1_cell+slot01")
|
||||
|
||||
;; sdiv thoughput 1/74, not pipelined but only in the FPU
|
||||
(define_insn_reservation "cell-sdiv" 74
|
||||
(and (eq_attr "type" "sdiv,ddiv")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*72")
|
||||
|
||||
;; fsqrt thoughput 1/84, not pipelined but only in the FPU
|
||||
(define_insn_reservation "cell-sqrt" 84
|
||||
(and (eq_attr "type" "ssqrt,dsqrt")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot1,nonpipeline,nonpipeline*82")
|
||||
|
||||
; VMX
|
||||
(define_insn_reservation "cell-vecsimple" 4
|
||||
(and (eq_attr "type" "vecsimple")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,vsu1_cell,vsu1_cell*2")
|
||||
|
||||
;; mult, div, madd
|
||||
(define_insn_reservation "cell-veccomplex" 10
|
||||
(and (eq_attr "type" "veccomplex")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,vsu1_cell,vsu1_cell*8")
|
||||
|
||||
;; TODO: add support for recording instructions
|
||||
(define_insn_reservation "cell-veccmp" 4
|
||||
(and (eq_attr "type" "veccmp")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,vsu1_cell,vsu1_cell*2")
|
||||
|
||||
(define_insn_reservation "cell-vecfloat" 12
|
||||
(and (eq_attr "type" "vecfloat")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,vsu1_cell,vsu1_cell*10")
|
||||
|
||||
(define_insn_reservation "cell-vecperm" 4
|
||||
(and (eq_attr "type" "vecperm")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,vsu2_cell,vsu2_cell*2")
|
||||
|
||||
;; New for 4.2, syncs
|
||||
|
||||
(define_insn_reservation "cell-sync" 11
|
||||
(and (eq_attr "type" "sync")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,lsu_cell,lsu_cell*9")
|
||||
|
||||
(define_insn_reservation "cell-isync" 11
|
||||
(and (eq_attr "type" "isync")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,lsu_cell,lsu_cell*9")
|
||||
|
||||
(define_insn_reservation "cell-load_l" 11
|
||||
(and (eq_attr "type" "load_l")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,lsu_cell,lsu_cell*9")
|
||||
|
||||
(define_insn_reservation "cell-store_c" 11
|
||||
(and (eq_attr "type" "store_c")
|
||||
(eq_attr "cpu" "cell"))
|
||||
"slot01,lsu_cell,lsu_cell*9")
|
||||
|
||||
;; RAW register dependency
|
||||
|
||||
;; addi r3, r3, 1
|
||||
;; lw r4,offset(r3)
|
||||
;; there are 5 cycle deplay for r3 bypassing
|
||||
;; there are 5 cycle delay for a dependent load after a load
|
||||
(define_bypass 5 "cell-integer" "cell-load")
|
||||
(define_bypass 5 "cell-integer" "cell-load-ext")
|
||||
(define_bypass 5 "cell-load,cell-load-ext" "cell-load,cell-load-ext")
|
||||
|
||||
;; there is a 6 cycle delay after a fp compare until you can use the cr.
|
||||
(define_bypass 6 "cell-fpcompare" "cell-branch,cell-branchreg,cell-mfcr,cell-crlogical")
|
||||
|
||||
;; VXU float RAW
|
||||
(define_bypass 11 "cell-vecfloat" "cell-vecfloat")
|
||||
|
||||
;; VXU and FPU
|
||||
(define_bypass 6 "cell-veccomplex" "cell-vecsimple")
|
||||
;;(define_bypass 6 "cell-veccompare" "cell-branch,cell-branchreg")
|
||||
(define_bypass 3 "cell-vecfloat" "cell-veccomplex")
|
||||
; this is not correct,
|
||||
;; this is a stall in general and not dependent on result
|
||||
(define_bypass 13 "cell-vecstore" "cell-fpstore")
|
||||
; this is not correct, this can never be true, not depent on result
|
||||
(define_bypass 7 "cell-fp" "cell-fpload")
|
||||
;; vsu1 should avoid writing to the same target register as vsu2 insn
|
||||
;; within 12 cycles.
|
||||
|
||||
;; WAW hazard
|
||||
|
||||
;; the target of VSU estimate should not be reused within 10 dispatch groups
|
||||
;; the target of VSU float should not be reused within 8 dispatch groups
|
||||
;; the target of VSU complex should not be reused within 5 dispatch groups
|
||||
;; FP LOAD should not reuse an FPU Arithmetic target with 6 dispatch gropus
|
||||
|
||||
;; mtctr-bcctr/bcctrl, branch target ctr register shadow update at
|
||||
;; ex4 stage(10 cycles)
|
||||
(define_bypass 10 "cell-mtjmpr" "cell-branchreg")
|
||||
|
||||
;;Things are not simulated:
|
||||
;; update instruction, update address gpr are not simulated
|
||||
;; vrefp, vrsqrtefp have latency(14), currently simluated as 12 cycle float
|
||||
;; insns
|
||||
|
@ -139,6 +139,8 @@ struct rs6000_cpu_select rs6000_select[3] =
|
||||
{ (const char *)0, "-mtune=", 1, 0 },
|
||||
};
|
||||
|
||||
static GTY(()) bool rs6000_cell_dont_microcode;
|
||||
|
||||
/* Always emit branch hint bits. */
|
||||
static GTY(()) bool rs6000_always_hint;
|
||||
|
||||
@ -519,6 +521,22 @@ struct processor_costs ppc630_cost = {
|
||||
COSTS_N_INSNS (21), /* ddiv */
|
||||
};
|
||||
|
||||
/* Instruction costs on Cell processor. */
|
||||
/* COSTS_N_INSNS (1) ~ one add. */
|
||||
static const
|
||||
struct processor_costs ppccell_cost = {
|
||||
COSTS_N_INSNS (9/2)+2, /* mulsi */
|
||||
COSTS_N_INSNS (6/2), /* mulsi_const */
|
||||
COSTS_N_INSNS (6/2), /* mulsi_const9 */
|
||||
COSTS_N_INSNS (15/2)+2, /* muldi */
|
||||
COSTS_N_INSNS (38/2), /* divsi */
|
||||
COSTS_N_INSNS (70/2), /* divdi */
|
||||
COSTS_N_INSNS (10/2), /* fp */
|
||||
COSTS_N_INSNS (10/2), /* dmul */
|
||||
COSTS_N_INSNS (74/2), /* sdiv */
|
||||
COSTS_N_INSNS (74/2), /* ddiv */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC750 and PPC7400 processors. */
|
||||
static const
|
||||
struct processor_costs ppc750_cost = {
|
||||
@ -671,6 +689,7 @@ static bool rs6000_rtx_costs (rtx, int, int, int *);
|
||||
static int rs6000_adjust_cost (rtx, rtx, rtx, int);
|
||||
static void rs6000_sched_init (FILE *, int, int);
|
||||
static bool is_microcoded_insn (rtx);
|
||||
static bool is_nonpipeline_insn (rtx);
|
||||
static bool is_cracked_insn (rtx);
|
||||
static bool is_branch_slot_insn (rtx);
|
||||
static bool is_load_insn (rtx);
|
||||
@ -692,6 +711,7 @@ static void rs6000_sched_finish (FILE *, int);
|
||||
static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
|
||||
static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
|
||||
static int rs6000_use_sched_lookahead (void);
|
||||
static int rs6000_use_sched_lookahead_guard (rtx);
|
||||
static tree rs6000_builtin_mask_for_load (void);
|
||||
static tree rs6000_builtin_mul_widen_even (tree);
|
||||
static tree rs6000_builtin_mul_widen_odd (tree);
|
||||
@ -952,6 +972,9 @@ static const char alt_reg_names[][8] =
|
||||
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
|
||||
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
|
||||
|
||||
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
|
||||
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
|
||||
|
||||
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
|
||||
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
|
||||
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
|
||||
@ -1217,6 +1240,8 @@ rs6000_override_options (const char *default_cpu)
|
||||
{"860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
|
||||
{"970", PROCESSOR_POWER4,
|
||||
POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
|
||||
{"cell", PROCESSOR_CELL,
|
||||
POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
|
||||
{"common", PROCESSOR_COMMON, MASK_NEW_MNEMONICS},
|
||||
{"ec603e", PROCESSOR_PPC603, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
|
||||
{"G3", PROCESSOR_PPC750, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
|
||||
@ -1445,7 +1470,8 @@ rs6000_override_options (const char *default_cpu)
|
||||
|
||||
rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
|
||||
&& rs6000_cpu != PROCESSOR_POWER5
|
||||
&& rs6000_cpu != PROCESSOR_POWER6);
|
||||
&& rs6000_cpu != PROCESSOR_POWER6
|
||||
&& rs6000_cpu != PROCESSOR_CELL);
|
||||
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
|
||||
|| rs6000_cpu == PROCESSOR_POWER5);
|
||||
rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
|
||||
@ -1519,6 +1545,16 @@ rs6000_override_options (const char *default_cpu)
|
||||
/* Set branch target alignment, if not optimizing for size. */
|
||||
if (!optimize_size)
|
||||
{
|
||||
/* Cell wants to be aligned 8byte for dual issue. */
|
||||
if (rs6000_cpu == PROCESSOR_CELL)
|
||||
{
|
||||
if (align_functions <= 0)
|
||||
align_functions = 8;
|
||||
if (align_jumps <= 0)
|
||||
align_jumps = 8;
|
||||
if (align_loops <= 0)
|
||||
align_loops = 8;
|
||||
}
|
||||
if (rs6000_align_branch_targets)
|
||||
{
|
||||
if (align_functions <= 0)
|
||||
@ -1600,6 +1636,10 @@ rs6000_override_options (const char *default_cpu)
|
||||
rs6000_cost = &ppc630_cost;
|
||||
break;
|
||||
|
||||
case PROCESSOR_CELL:
|
||||
rs6000_cost = &ppccell_cost;
|
||||
break;
|
||||
|
||||
case PROCESSOR_PPC750:
|
||||
case PROCESSOR_PPC7400:
|
||||
rs6000_cost = &ppc750_cost;
|
||||
@ -14940,7 +14980,7 @@ rs6000_emit_epilogue (int sibcall)
|
||||
rs6000_stack_t *info;
|
||||
int restoring_FPRs_inline;
|
||||
int using_load_multiple;
|
||||
int using_mfcr_multiple;
|
||||
int using_mtcr_multiple;
|
||||
int use_backchain_to_restore_sp;
|
||||
int sp_offset = 0;
|
||||
rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
|
||||
@ -14969,7 +15009,7 @@ rs6000_emit_epilogue (int sibcall)
|
||||
use_backchain_to_restore_sp = (frame_pointer_needed
|
||||
|| current_function_calls_alloca
|
||||
|| info->total_size > 32767);
|
||||
using_mfcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
|
||||
using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
|
||||
|| rs6000_cpu == PROCESSOR_PPC603
|
||||
|| rs6000_cpu == PROCESSOR_PPC750
|
||||
|| optimize_size);
|
||||
@ -15269,7 +15309,7 @@ rs6000_emit_epilogue (int sibcall)
|
||||
rtx r12_rtx = gen_rtx_REG (SImode, 12);
|
||||
int count = 0;
|
||||
|
||||
if (using_mfcr_multiple)
|
||||
if (using_mtcr_multiple)
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
if (regs_ever_live[CR0_REGNO+i] && ! call_used_regs[CR0_REGNO+i])
|
||||
@ -15277,7 +15317,7 @@ rs6000_emit_epilogue (int sibcall)
|
||||
gcc_assert (count);
|
||||
}
|
||||
|
||||
if (using_mfcr_multiple && count > 1)
|
||||
if (using_mtcr_multiple && count > 1)
|
||||
{
|
||||
rtvec p;
|
||||
int ndx;
|
||||
@ -16595,6 +16635,10 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED,
|
||||
return cached_can_issue_more;
|
||||
}
|
||||
|
||||
/* If no reservation, but reach here */
|
||||
if (recog_memoized (insn) < 0)
|
||||
return more;
|
||||
|
||||
if (rs6000_sched_groups)
|
||||
{
|
||||
if (is_microcoded_insn (insn))
|
||||
@ -16607,6 +16651,9 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED,
|
||||
return cached_can_issue_more;
|
||||
}
|
||||
|
||||
if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
|
||||
return 0;
|
||||
|
||||
cached_can_issue_more = more - 1;
|
||||
return cached_can_issue_more;
|
||||
}
|
||||
@ -16662,7 +16709,8 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
||||
|| rs6000_cpu_attr == CPU_PPC7400
|
||||
|| rs6000_cpu_attr == CPU_PPC7450
|
||||
|| rs6000_cpu_attr == CPU_POWER4
|
||||
|| rs6000_cpu_attr == CPU_POWER5)
|
||||
|| rs6000_cpu_attr == CPU_POWER5
|
||||
|| rs6000_cpu_attr == CPU_CELL)
|
||||
&& recog_memoized (dep_insn)
|
||||
&& (INSN_CODE (dep_insn) >= 0))
|
||||
|
||||
@ -16912,6 +16960,9 @@ is_microcoded_insn (rtx insn)
|
||||
|| GET_CODE (PATTERN (insn)) == CLOBBER)
|
||||
return false;
|
||||
|
||||
if (rs6000_cpu_attr == CPU_CELL)
|
||||
return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
|
||||
|
||||
if (rs6000_sched_groups)
|
||||
{
|
||||
enum attr_type type = get_attr_type (insn);
|
||||
@ -17115,6 +17166,37 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
|
||||
return priority;
|
||||
}
|
||||
|
||||
/* Return true if the instruction is nonpipelined on the Cell. */
|
||||
static bool
|
||||
is_nonpipeline_insn (rtx insn)
|
||||
{
|
||||
enum attr_type type;
|
||||
if (!insn || !INSN_P (insn)
|
||||
|| GET_CODE (PATTERN (insn)) == USE
|
||||
|| GET_CODE (PATTERN (insn)) == CLOBBER)
|
||||
return false;
|
||||
|
||||
type = get_attr_type (insn);
|
||||
if (type == TYPE_IMUL
|
||||
|| type == TYPE_IMUL2
|
||||
|| type == TYPE_IMUL3
|
||||
|| type == TYPE_LMUL
|
||||
|| type == TYPE_IDIV
|
||||
|| type == TYPE_LDIV
|
||||
|| type == TYPE_SDIV
|
||||
|| type == TYPE_DDIV
|
||||
|| type == TYPE_SSQRT
|
||||
|| type == TYPE_DSQRT
|
||||
|| type == TYPE_MFCR
|
||||
|| type == TYPE_MFCRF
|
||||
|| type == TYPE_MFJMPR)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* Return how many instructions the machine can issue per cycle. */
|
||||
|
||||
static int
|
||||
@ -17135,6 +17217,7 @@ rs6000_issue_rate (void)
|
||||
case CPU_PPC750:
|
||||
case CPU_PPC7400:
|
||||
case CPU_PPC8540:
|
||||
case CPU_CELL:
|
||||
return 2;
|
||||
case CPU_RIOS2:
|
||||
case CPU_PPC604:
|
||||
@ -17159,9 +17242,29 @@ rs6000_use_sched_lookahead (void)
|
||||
{
|
||||
if (rs6000_cpu_attr == CPU_PPC8540)
|
||||
return 4;
|
||||
if (rs6000_cpu_attr == CPU_CELL)
|
||||
return (reload_completed ? 8 : 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We are choosing insn from the ready queue. Return nonzero if INSN can be chosen. */
|
||||
static int
|
||||
rs6000_use_sched_lookahead_guard (rtx insn)
|
||||
{
|
||||
if (rs6000_cpu_attr != CPU_CELL)
|
||||
return 1;
|
||||
|
||||
if (insn == NULL_RTX || !INSN_P (insn))
|
||||
abort ();
|
||||
|
||||
if (!reload_completed
|
||||
|| is_nonpipeline_insn (insn)
|
||||
|| is_microcoded_insn (insn))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Determine is PAT refers to memory. */
|
||||
|
||||
static bool
|
||||
@ -17337,9 +17440,25 @@ rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
|
||||
int *pn_ready ATTRIBUTE_UNUSED,
|
||||
int clock_var ATTRIBUTE_UNUSED)
|
||||
{
|
||||
int n_ready = *pn_ready;
|
||||
|
||||
if (sched_verbose)
|
||||
fprintf (dump, "// rs6000_sched_reorder :\n");
|
||||
|
||||
/* Reorder the ready list, if the second to last ready insn
|
||||
is a nonepipeline insn. */
|
||||
if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
|
||||
{
|
||||
if (is_nonpipeline_insn (ready[n_ready - 1])
|
||||
&& (recog_memoized (ready[n_ready - 2]) > 0))
|
||||
/* Simply swap first two insns. */
|
||||
{
|
||||
rtx tmp = ready[n_ready - 1];
|
||||
ready[n_ready - 1] = ready[n_ready - 2];
|
||||
ready[n_ready - 2] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (rs6000_cpu == PROCESSOR_POWER6)
|
||||
load_store_pendulum = 0;
|
||||
|
||||
|
@ -68,6 +68,7 @@
|
||||
%{mno-power: %{!mpowerpc*: -mcom}} \
|
||||
%{!mno-power: %{!mpower*: %(asm_default)}}} \
|
||||
%{mcpu=common: -mcom} \
|
||||
%{mcpu=cell: -mcell} \
|
||||
%{mcpu=power: -mpwr} \
|
||||
%{mcpu=power2: -mpwrx} \
|
||||
%{mcpu=power3: -mppc64} \
|
||||
@ -222,7 +223,8 @@ enum processor_type
|
||||
PROCESSOR_PPC8540,
|
||||
PROCESSOR_POWER4,
|
||||
PROCESSOR_POWER5,
|
||||
PROCESSOR_POWER6
|
||||
PROCESSOR_POWER6,
|
||||
PROCESSOR_CELL
|
||||
};
|
||||
|
||||
extern enum processor_type rs6000_cpu;
|
||||
|
@ -106,9 +106,26 @@
|
||||
;; Processor type -- this attribute must exactly match the processor_type
|
||||
;; enumeration in rs6000.h.
|
||||
|
||||
(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6"
|
||||
(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4,power5,power6,cell"
|
||||
(const (symbol_ref "rs6000_cpu_attr")))
|
||||
|
||||
|
||||
;; If this instruction is microcoded on the CELL processor
|
||||
; The default for load and stores is conditional
|
||||
; The default for load extended and the recorded instructions is always microcoded
|
||||
(define_attr "cell_micro" "not,conditional,always"
|
||||
(if_then_else (ior (ior (eq_attr "type" "load")
|
||||
(eq_attr "type" "store"))
|
||||
(ior (eq_attr "type" "fpload")
|
||||
(eq_attr "type" "fpstore")))
|
||||
(const_string "conditional")
|
||||
(if_then_else (ior (eq_attr "type" "load_ext")
|
||||
(ior (eq_attr "type" "compare")
|
||||
(eq_attr "type" "delayed_compare")))
|
||||
(const_string "always")
|
||||
(const_string "not"))))
|
||||
|
||||
|
||||
(automata_option "ndfa")
|
||||
|
||||
(include "rios1.md")
|
||||
@ -125,6 +142,7 @@
|
||||
(include "power4.md")
|
||||
(include "power5.md")
|
||||
(include "power6.md")
|
||||
(include "cell.md")
|
||||
|
||||
(include "predicates.md")
|
||||
(include "constraints.md")
|
||||
|
Loading…
x
Reference in New Issue
Block a user