mirror of
git://gcc.gnu.org/git/gcc.git
synced 2024-12-24 23:09:50 +08:00
pa-protos.h (hppa_fpstore_bypass_p): Declare.
* pa-protos.h (hppa_fpstore_bypass_p): Declare. * pa.c (pa_adjust_cost): Remove all true dependency cost adjustments. Also remove support for non-DFA scheduling. * pa.md (700, 7100, 7100lc, 7200, 7300): Use bypass mechanism to adjust true dependency costs. Update various comments. (7100lc, 7200, 7300 scheduling): Simplify by combining the FP ALU & MPY units into a single unit. From-SVN: r53227
This commit is contained in:
parent
a17a104c21
commit
5d50fab314
@ -1,3 +1,13 @@
|
||||
2002-05-06 Jeff Law <law@redhat.com>
|
||||
|
||||
* pa-protos.h (hppa_fpstore_bypass_p): Declare.
|
||||
* pa.c (pa_adjust_cost): Remove all true dependency cost
|
||||
adjustments. Also remove support for non-DFA scheduling.
|
||||
* pa.md (700, 7100, 7100lc, 7200, 7300): Use bypass mechanism
|
||||
to adjust true dependency costs. Update various comments.
|
||||
(7100lc, 7200, 7300 scheduling): Simplify by combining the
|
||||
FP ALU & MPY units into a single unit.
|
||||
|
||||
2002-05-06 Catherine Moore <clm@redhat.com>
|
||||
|
||||
* config/v850/v850.c (compute_register_save_size): Make sure
|
||||
|
@ -103,6 +103,7 @@ extern int is_function_label_plus_const PARAMS ((rtx));
|
||||
extern int jump_in_call_delay PARAMS ((rtx));
|
||||
extern enum reg_class secondary_reload_class PARAMS ((enum reg_class,
|
||||
enum machine_mode, rtx));
|
||||
extern int hppa_fpstore_bypass_p PARAMS ((rtx, rtx));
|
||||
|
||||
/* Declare functions defined in pa.c and used in templates. */
|
||||
|
||||
|
@ -60,6 +60,33 @@ hppa_use_dfa_pipeline_interface ()
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Return nonzero if there is a bypass for the output of
|
||||
OUT_INSN and the fp store IN_INSN. */
|
||||
int
|
||||
hppa_fpstore_bypass_p (out_insn, in_insn)
|
||||
rtx out_insn, in_insn;
|
||||
{
|
||||
enum machine_mode store_mode;
|
||||
enum machine_mode other_mode;
|
||||
rtx set;
|
||||
|
||||
if (recog_memoized (in_insn) < 0
|
||||
|| get_attr_type (in_insn) != TYPE_FPSTORE
|
||||
|| recog_memoized (out_insn) < 0)
|
||||
return 0;
|
||||
|
||||
store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
|
||||
|
||||
set = single_set (out_insn);
|
||||
if (!set)
|
||||
return 0;
|
||||
|
||||
other_mode = GET_MODE (SET_SRC (set));
|
||||
|
||||
return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
|
||||
}
|
||||
|
||||
|
||||
#ifndef DO_FRAME_NOTES
|
||||
#ifdef INCOMING_RETURN_ADDR_RTX
|
||||
#define DO_FRAME_NOTES 1
|
||||
@ -3907,8 +3934,9 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
||||
{
|
||||
enum attr_type attr_type;
|
||||
|
||||
/* Don't adjust costs for a pa8000 chip. */
|
||||
if (pa_cpu >= PROCESSOR_8000)
|
||||
/* Don't adjust costs for a pa8000 chip, also do not adjust any
|
||||
true dependencies as they are described with bypasses now. */
|
||||
if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
|
||||
return cost;
|
||||
|
||||
if (! recog_memoized (insn))
|
||||
@ -3916,65 +3944,7 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
||||
|
||||
attr_type = get_attr_type (insn);
|
||||
|
||||
if (REG_NOTE_KIND (link) == 0)
|
||||
{
|
||||
/* Data dependency; DEP_INSN writes a register that INSN reads some
|
||||
cycles later. */
|
||||
|
||||
if (attr_type == TYPE_FPSTORE)
|
||||
{
|
||||
rtx pat = PATTERN (insn);
|
||||
rtx dep_pat = PATTERN (dep_insn);
|
||||
if (GET_CODE (pat) == PARALLEL)
|
||||
{
|
||||
/* This happens for the fstXs,mb patterns. */
|
||||
pat = XVECEXP (pat, 0, 0);
|
||||
}
|
||||
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
|
||||
/* If this happens, we have to extend this to schedule
|
||||
optimally. Return 0 for now. */
|
||||
return 0;
|
||||
|
||||
if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
|
||||
{
|
||||
if (! recog_memoized (dep_insn))
|
||||
return 0;
|
||||
/* DEP_INSN is writing its result to the register
|
||||
being stored in the fpstore INSN. */
|
||||
switch (get_attr_type (dep_insn))
|
||||
{
|
||||
case TYPE_FPLOAD:
|
||||
/* This cost 3 cycles, not 2 as the md says for the
|
||||
700 and 7100, 7100lc, 7200 and 7300. */
|
||||
return cost + 1;
|
||||
|
||||
case TYPE_FPALU:
|
||||
case TYPE_FPMULSGL:
|
||||
case TYPE_FPMULDBL:
|
||||
case TYPE_FPDIVSGL:
|
||||
case TYPE_FPDIVDBL:
|
||||
case TYPE_FPSQRTSGL:
|
||||
case TYPE_FPSQRTDBL:
|
||||
/* In these important cases, we save one cycle compared to
|
||||
when flop instruction feed each other. */
|
||||
return cost - 1;
|
||||
|
||||
default:
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
/* A flop-flop true depenendency where the sizes of the operand
|
||||
carrying the dependency is difference causes an additional
|
||||
cycle stall on the 7100lc, 7200, and 7300. Similarly for
|
||||
a fpload-flop true dependency. */
|
||||
}
|
||||
|
||||
/* For other data dependencies, the default cost specified in the
|
||||
md is correct. */
|
||||
return cost;
|
||||
}
|
||||
else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
|
||||
if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
|
||||
{
|
||||
/* Anti dependency; DEP_INSN reads a register that INSN writes some
|
||||
cycles later. */
|
||||
@ -4010,10 +3980,7 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
||||
preceding arithmetic operation has finished if
|
||||
the target of the fpload is any of the sources
|
||||
(or destination) of the arithmetic operation. */
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 1;
|
||||
else
|
||||
return cost - 1;
|
||||
return insn_default_latency (dep_insn) - 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
@ -4048,10 +4015,7 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
||||
preceding divide or sqrt operation has finished if
|
||||
the target of the ALU flop is any of the sources
|
||||
(or destination) of the divide or sqrt operation. */
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 2;
|
||||
else
|
||||
return cost - 2;
|
||||
return insn_default_latency (dep_insn) - 2;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
@ -4101,10 +4065,7 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
||||
Exception: For PA7100LC, PA7200 and PA7300, the cost
|
||||
is 3 cycles, unless they bundle together. We also
|
||||
pay the penalty if the second insn is a fpload. */
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 1;
|
||||
else
|
||||
return cost - 1;
|
||||
return insn_default_latency (dep_insn) - 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
@ -4139,10 +4100,7 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
||||
preceding divide or sqrt operation has finished if
|
||||
the target of the ALU flop is also the target of
|
||||
the divide or sqrt operation. */
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 2;
|
||||
else
|
||||
return cost - 2;
|
||||
return insn_default_latency (dep_insn) - 2;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@ -206,20 +206,41 @@
|
||||
"fpmpy_700*18")
|
||||
|
||||
(define_insn_reservation "W7" 2
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "cpu" "700"))
|
||||
"mem_700")
|
||||
|
||||
(define_insn_reservation "W8" 3
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(define_insn_reservation "W8" 2
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "cpu" "700"))
|
||||
"mem_700")
|
||||
|
||||
(define_insn_reservation "W9" 3
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "cpu" "700"))
|
||||
"mem_700*3")
|
||||
|
||||
(define_insn_reservation "W9" 1
|
||||
(define_insn_reservation "W10" 3
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(eq_attr "cpu" "700"))
|
||||
"mem_700*3")
|
||||
|
||||
(define_insn_reservation "W11" 1
|
||||
(and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "700"))
|
||||
"dummy_700")
|
||||
|
||||
;; We have a bypass for all computations in the FP unit which feed an
|
||||
;; FP store as long as the sizes are the same.
|
||||
(define_bypass 2 "W1,W2" "W10" "hppa_fpstore_bypass_p")
|
||||
(define_bypass 9 "W3" "W10" "hppa_fpstore_bypass_p")
|
||||
(define_bypass 11 "W4" "W10" "hppa_fpstore_bypass_p")
|
||||
(define_bypass 13 "W5" "W10" "hppa_fpstore_bypass_p")
|
||||
(define_bypass 17 "W6" "W10" "hppa_fpstore_bypass_p")
|
||||
|
||||
;; We have an "anti-bypass" for FP loads which feed an FP store.
|
||||
(define_bypass 4 "W8" "W10" "hppa_fpstore_bypass_p")
|
||||
|
||||
;; Function units for the 7100 and 7150. The 7100/7150 can dual-issue
|
||||
;; floating point computations with non-floating point computations (fp loads
|
||||
;; and stores are not fp computations).
|
||||
@ -228,8 +249,12 @@
|
||||
;; take two cycles, during which no Dcache operations should be scheduled.
|
||||
;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC
|
||||
;; all have the same memory characteristics if one disregards cache misses.
|
||||
|
||||
;;
|
||||
;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.
|
||||
;; There's no value in modeling the ALU and MUL separately though
|
||||
;; since there can never be a functional unit conflict given the
|
||||
;; latency and issue rates for those units.
|
||||
;;
|
||||
;; Timings:
|
||||
;; Instruction Time Unit Minimum Distance (unit contention)
|
||||
;; fcpy 2 ALU 1
|
||||
@ -247,11 +272,6 @@
|
||||
;; fdiv,dbl 15 DIV 15
|
||||
;; fsqrt,sgl 8 DIV 8
|
||||
;; fsqrt,dbl 15 DIV 15
|
||||
;;
|
||||
;; We don't really model the FP ALU/MPY units properly (they are
|
||||
;; distinct subunits in the FP unit). However, there can never be
|
||||
;; a functional unit; conflict given the latency and issue rates
|
||||
;; for those units.
|
||||
|
||||
(define_automaton "pa7100")
|
||||
(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")
|
||||
@ -272,21 +292,45 @@
|
||||
"f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")
|
||||
|
||||
(define_insn_reservation "X3" 2
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100+mem_7100")
|
||||
|
||||
(define_insn_reservation "X4" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100+mem_7100")
|
||||
|
||||
(define_insn_reservation "X5" 2
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100+mem_7100,mem_7100")
|
||||
|
||||
(define_insn_reservation "X5" 1
|
||||
(define_insn_reservation "X6" 2
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100+mem_7100,mem_7100")
|
||||
|
||||
(define_insn_reservation "X7" 1
|
||||
(and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100")
|
||||
|
||||
;; We have a bypass for all computations in the FP unit which feed an
|
||||
;; FP store as long as the sizes are the same.
|
||||
(define_bypass 1 "X0" "X6" "hppa_fpstore_bypass_p")
|
||||
(define_bypass 7 "X1" "X6" "hppa_fpstore_bypass_p")
|
||||
(define_bypass 14 "X2" "X6" "hppa_fpstore_bypass_p")
|
||||
|
||||
;; We have an "anti-bypass" for FP loads which feed an FP store.
|
||||
(define_bypass 3 "X4" "X6" "hppa_fpstore_bypass_p")
|
||||
|
||||
;; The 7100LC has three floating-point units: ALU, MUL, and DIV.
|
||||
;; There's no value in modeling the ALU and MUL separately though
|
||||
;; since there can never be a functional unit conflict that
|
||||
;; can be avoided given the latency, issue rates and mandatory
|
||||
;; one cycle cpu-wide lock for a double precision fp multiply.
|
||||
;;
|
||||
;; Timings:
|
||||
;; Instruction Time Unit Minimum Distance (unit contention)
|
||||
;; fcpy 2 ALU 1
|
||||
@ -321,29 +365,25 @@
|
||||
;;
|
||||
;; load-load pairs
|
||||
;; store-store pairs
|
||||
;; fmpyadd,dbl
|
||||
;; fmpysub,dbl
|
||||
;; other issue modeling
|
||||
|
||||
(define_automaton "pa7100lc")
|
||||
(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")
|
||||
(define_cpu_unit "fpalu_7100lc,fpmul_7100lc" "pa7100lc")
|
||||
(define_cpu_unit "fpmac_7100lc" "pa7100lc")
|
||||
(define_cpu_unit "mem_7100lc" "pa7100lc")
|
||||
|
||||
(define_insn_reservation "Y0" 2
|
||||
(and (eq_attr "type" "fpcc,fpalu")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc,fpalu_7100lc")
|
||||
|
||||
;; Double precision multiplies lock the entire CPU for one
|
||||
;; cycle. There is no way to avoid this lock and trying to
|
||||
;; schedule around the lock is pointless and thus there is no
|
||||
;; value in trying to model this lock. Not modeling the lock
|
||||
;; allows for a smaller DFA and may reduce register pressure.
|
||||
(define_insn_reservation "Y1" 2
|
||||
(and (eq_attr "type" "fpmulsgl,fpmuldbl")
|
||||
;; value in trying to model this lock.
|
||||
;;
|
||||
;; Not modeling the lock allows us to treat fp multiplies just
|
||||
;; like any other FP alu instruction. It allows for a smaller
|
||||
;; DFA and may reduce register pressure.
|
||||
(define_insn_reservation "Y0" 2
|
||||
(and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc,fpmul_7100lc")
|
||||
"f_7100lc,fpmac_7100lc")
|
||||
|
||||
;; fp division and sqrt instructions lock the entire CPU for
|
||||
;; 7 cycles (single precision) or 14 cycles (double precision).
|
||||
@ -351,43 +391,66 @@
|
||||
;; around the lock is pointless and thus there is no value in
|
||||
;; trying to model this lock. Not modeling the lock allows
|
||||
;; for a smaller DFA and may reduce register pressure.
|
||||
(define_insn_reservation "Y2" 1
|
||||
(define_insn_reservation "Y1" 1
|
||||
(and (eq_attr "type" "fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc")
|
||||
|
||||
(define_insn_reservation "Y2" 2
|
||||
(and (eq_attr "type" "load")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"i1_7100lc+mem_7100lc")
|
||||
|
||||
(define_insn_reservation "Y3" 2
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(and (eq_attr "type" "fpload")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"i1_7100lc+mem_7100lc")
|
||||
|
||||
(define_insn_reservation "Y4" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "cpu" "7100LC"))
|
||||
"i1_7100lc+mem_7100lc,mem_7100lc")
|
||||
|
||||
(define_insn_reservation "Y5" 1
|
||||
(define_insn_reservation "Y5" 2
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(eq_attr "cpu" "7100LC"))
|
||||
"i1_7100lc+mem_7100lc,mem_7100lc")
|
||||
|
||||
(define_insn_reservation "Y6" 1
|
||||
(and (eq_attr "type" "shift,nullshift")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"i1_7100lc")
|
||||
|
||||
(define_insn_reservation "Y6" 1
|
||||
(define_insn_reservation "Y7" 1
|
||||
(and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"(i0_7100lc|i1_7100lc)")
|
||||
|
||||
;; The 7200 has a store-load penalty
|
||||
(define_insn_reservation "Y7" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(define_insn_reservation "Y8" 2
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "cpu" "7200"))
|
||||
"i1_7100lc,mem_7100lc")
|
||||
|
||||
(define_insn_reservation "Y9" 2
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(eq_attr "cpu" "7200"))
|
||||
"i1_7100lc,mem_7100lc")
|
||||
|
||||
;; The 7300 has no penalty for store-store or store-load
|
||||
(define_insn_reservation "Y8" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(define_insn_reservation "Y10" 2
|
||||
(and (eq_attr "type" "store")
|
||||
(eq_attr "cpu" "7300"))
|
||||
"i1_7100lc")
|
||||
|
||||
(define_insn_reservation "Y11" 2
|
||||
(and (eq_attr "type" "fpstore")
|
||||
(eq_attr "cpu" "7300"))
|
||||
"i1_7100lc")
|
||||
|
||||
;; We have an "anti-bypass" for FP loads which feed an FP store.
|
||||
(define_bypass 3 "Y3" "Y5,Y9,Y11" "hppa_fpstore_bypass_p")
|
||||
|
||||
;; Scheduling for the PA8000 is somewhat different than scheduling for a
|
||||
;; traditional architecture.
|
||||
;;
|
||||
|
Loading…
Reference in New Issue
Block a user