diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 68e89f2ad503..4ec4c5bd5cf4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,4 +1,16 @@ -Wed Jul 17 14:04:10 2002 J"orn Rennecke +Wed Jul 17 16:28:53 2002 J"orn Rennecke + + * config/sh/lib1funcs.asm (init_trampoline): New entry point. + * sh-protos.h (sh_initialize_trampoline): Declare. + * sh.c (sh_initialize_trampoline): New function. + * sh.h (TRAMPOLINE_SIZE): Only 24 for TARGET_SHMEDIA32. + (TRAMPOLINE_ALIGNMENT): Need cache-line alignment for TARGET_SHMEDIA. + (INITIALIZE_TRAMPOLINE): Call sh_initialize_trampoline. + (TRAMPOLINE_ADJUST_ADDRESS): Not needed for SHcompact. + * sh.md (initialize_trampoline, double_shori): New patterns. + (initialize_trampoline_compact): Likewise. + (shmedia32_initialize_trampoline_big): Remove. + (shmedia32_initialize_trampoline_little): Likewise. * sh-protos.h (binary_float_operator): Remove declaration. (sh_expand_unop_v2sf, sh_expand_binop_v2sf): Declare. diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm index 1021c9ba3b56..d7071b123214 100644 --- a/gcc/config/sh/lib1funcs.asm +++ b/gcc/config/sh/lib1funcs.asm @@ -1821,6 +1821,22 @@ LOCAL(set_fpscr_L1): .mode SHmedia .section .text..SHmedia32,"ax" .align 2 + .global GLOBAL(init_trampoline) +GLOBAL(init_trampoline): + st.l r0,8,r2 +#ifdef __LITTLE_ENDIAN__ + movi 9,r20 + shori 0x402b,r20 + shori 0xd101,r20 + shori 0xd002,r20 +#else + movi 0xffffffffffffd002,r20 + shori 0xd101,r20 + shori 0x402b,r20 + shori 9,r20 +#endif + st.q r0,0,r20 + st.l r0,12,r3 .global GLOBAL(ic_invalidate) GLOBAL(ic_invalidate): ocbwb r0,0 diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index 432ab63367e0..9adfb1e16e14 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -125,6 +125,7 @@ extern int fldi_ok PARAMS ((void)); extern int sh_pr_n_sets PARAMS ((void)); extern int sh_hard_regno_rename_ok PARAMS ((unsigned int, unsigned int)); extern int sh_cfun_interrupt_handler_p (void); +extern void sh_initialize_trampoline (rtx, rtx, rtx); #ifdef HARD_CONST extern void fpscr_set_from_mem PARAMS ((int, HARD_REG_SET)); diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 315cdec5484d..26026b4c93c9 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -7113,6 +7113,177 @@ sh_strip_name_encoding (str) return str; } + +/* + On the SH1..SH4, the trampoline looks like + 2 0002 D202 mov.l l2,r2 + 1 0000 D301 mov.l l1,r3 + 3 0004 422B jmp @r2 + 4 0006 0009 nop + 5 0008 00000000 l1: .long area + 6 000c 00000000 l2: .long function + + SH5 (compact) uses r1 instead of r3 for the static chain. */ + + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ + +void +sh_initialize_trampoline (tramp, fnaddr, cxt) + rtx tramp, fnaddr, cxt; +{ + if (TARGET_SHMEDIA64) + { + rtx tramp_templ; + int fixed_len; + + rtx movi1 = GEN_INT (0xcc000010); + rtx shori1 = GEN_INT (0xc8000010); + rtx src, dst; + + /* The following trampoline works within a +- 128 KB range for cxt: + ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0; + shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0 + gettr tr1,r1; blink tr0,r63 */ + /* Address rounding makes it hard to compute the exact bounds of the + offset for this trampoline, but we have a rather generous offset + range, so frame_offset should do fine as an upper bound. */ + if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000) + { + /* ??? could optimize this trampoline initialization + by writing DImode words with two insns each. */ + rtx mask = force_reg (DImode, GEN_INT (0x3fffc00)); + rtx insn = gen_rtx_MINUS (DImode, cxt, tramp); + insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2)); + insn = gen_rtx_AND (DImode, insn, mask); + /* Or in ptb/u .,tr1 pattern */ + insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode)); + insn = force_operand (insn, NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (gen_rtx_MEM (SImode, tramp), insn); + insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn); + insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn); + insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)), + insn); + insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)), + insn); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)), + GEN_INT (0x6bf10600)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)), + GEN_INT (0x4415fc10)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)), + GEN_INT (0x4401fff0)); + emit_insn (gen_ic_invalidate_line (tramp)); + return; + } + tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline"); + fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode); + + tramp_templ = gen_datalabel_ref (tramp_templ); + dst = gen_rtx_MEM (BLKmode, tramp); + src = gen_rtx_MEM (BLKmode, tramp_templ); + set_mem_align (dst, 256); + set_mem_align (src, 64); + emit_block_move (dst, src, GEN_INT (fixed_len)); + + emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)), + fnaddr); + emit_move_insn (gen_rtx_MEM (Pmode, + plus_constant (tramp, + fixed_len + + GET_MODE_SIZE (Pmode))), + cxt); + emit_insn (gen_ic_invalidate_line (tramp)); + return; + } + else if (TARGET_SHMEDIA) + { + /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0 + movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */ + rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode); + rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode); + /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated, + rotated 10 right, and higer 16 bit of every 32 selected. */ + rtx movishori + = force_reg (V2HImode, (simplify_gen_subreg + (V2HImode, GEN_INT (0x4330432), SImode, 0))); + rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600)); + rtx blink = force_reg (DImode, GEN_INT (0x4401fff0)); + + tramp = force_reg (Pmode, tramp); + fnaddr = force_reg (SImode, fnaddr); + cxt = force_reg (SImode, cxt); + emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0), + gen_rtx_SUBREG (V2HImode, fnaddr, 0), + movishori)); + emit_insn (gen_rotldi3_mextr (quad0, quad0, + GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); + emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2))); + emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0); + emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0), + gen_rtx_SUBREG (V2HImode, cxt, 0), + movishori)); + emit_insn (gen_rotldi3_mextr (cxtload, cxtload, + GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); + emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2))); + if (TARGET_LITTLE_ENDIAN) + { + emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload)); + emit_insn (gen_mextr4 (quad2, cxtload, blink)); + } + else + { + emit_insn (gen_mextr4 (quad1, cxtload, ptabs)); + emit_insn (gen_mshflo_l_di (quad2, blink, cxtload)); + } + emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1); + emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2); + emit_insn (gen_ic_invalidate_line (tramp)); + return; + } + else if (TARGET_SHCOMPACT) + { + emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr)); + return; + } + emit_move_insn (gen_rtx_MEM (SImode, tramp), + gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301, + SImode)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), + gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009, + SImode)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), + cxt); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)), + fnaddr); + if (TARGET_HARVARD) + { + if (TARGET_USERMODE) + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"), + 0, VOIDmode, 1, tramp, SImode); + else + emit_insn (gen_ic_invalidate_line (tramp)); + } +} + /* Machine specific built-in functions. */ diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index c37b10c207e3..b8ab2cace624 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -2082,64 +2082,25 @@ while (0) 6 000c 00000000 l2: .long function */ /* Length in units of the trampoline for entering a nested function. */ -#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 32 : 16) +#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16) /* Alignment required for a trampoline in bits . */ #define TRAMPOLINE_ALIGNMENT \ - ((CACHE_LOG < 3 || (TARGET_SMALLCODE && ! TARGET_HARVARD)) ? 32 : 64) + ((CACHE_LOG < 3 || (TARGET_SMALLCODE && ! TARGET_HARVARD)) ? 32 \ + : TARGET_SHMEDIA ? 256 : 64) /* Emit RTL insns to initialize the variable parts of a trampoline. FNADDR is an RTX for the address of the function's pure code. CXT is an RTX for the static chain value for the function. */ -#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) do \ -{ \ - if (TARGET_SH5) \ - { \ - rtx tramp_templ = gen_rtx_SYMBOL_REF (Pmode, \ - "__GCC_nested_trampoline"); \ - int fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode); \ - \ - tramp_templ = gen_datalabel_ref (tramp_templ); \ - emit_block_move (gen_rtx_MEM (BLKmode, (TRAMP)), \ - gen_rtx_MEM (BLKmode, tramp_templ), \ - GEN_INT (fixed_len)); \ - emit_move_insn (gen_rtx_MEM (Pmode, plus_constant ((TRAMP), \ - fixed_len)), \ - (FNADDR)); \ - emit_move_insn (gen_rtx_MEM (Pmode, \ - plus_constant ((TRAMP), \ - fixed_len \ - + GET_MODE_SIZE (Pmode))), \ - (CXT)); \ - emit_insn (gen_ic_invalidate_line (TRAMP)); \ - break; \ - } \ - emit_move_insn (gen_rtx_MEM (SImode, (TRAMP)), \ - GEN_INT (trunc_int_for_mode \ - (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,\ - SImode))); \ - emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 4)), \ - GEN_INT (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009));\ - emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 8)), \ - (CXT)); \ - emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 12)), \ - (FNADDR)); \ - if (TARGET_HARVARD) \ - { \ - if (TARGET_USERMODE) \ - emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),\ - 0, VOIDmode, 1, (TRAMP), SImode); \ - else \ - emit_insn (gen_ic_invalidate_line (TRAMP)); \ - } \ -} while (0) +#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \ + sh_initialize_trampoline ((TRAMP), (FNADDR), (CXT)) /* On SH5, trampolines are SHmedia code, so add 1 to the address. */ #define TRAMPOLINE_ADJUST_ADDRESS(TRAMP) do \ { \ - if (TARGET_SH5) \ + if (TARGET_SHMEDIA) \ (TRAMP) = expand_simple_binop (Pmode, PLUS, (TRAMP), GEN_INT (1), \ gen_reg_rtx (Pmode), 0, \ OPTAB_LIB_WIDEN); \ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index c3ac6e1b4148..821562536e8d 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -3667,6 +3667,8 @@ [(set_attr "length" "8") (set_attr "insn_class" "cwb")]) +;; ??? could make arg 0 an offsettable memory operand to allow to save +;; an add in the code that calculates the address. (define_insn "ic_invalidate_line_media" [(unspec_volatile [(match_operand 0 "register_operand" "r")] UNSPEC_ICACHE)] @@ -3685,6 +3687,37 @@ [(set_attr "type" "sfunc") (set_attr "needs_delay_slot" "yes")]) +(define_expand "initialize_trampoline" + [(match_operand:SI 0 "" "") + (match_operand:SI 1 "" "") + (match_operand:SI 2 "" "")] + "TARGET_SHCOMPACT" + " +{ + rtx sfun, tramp; + + sfun = force_reg (Pmode, gen_rtx_SYMBOL_REF (Pmode, \"__init_trampoline\")); + tramp = gen_rtx_REG (SImode, R0_REG); + emit_move_insn (tramp, operands[0]); + emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]); + + emit_insn (gen_initialize_trampoline_compact (tramp, sfun)); + DONE; +}") + +(define_insn "initialize_trampoline_compact" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "z") + (match_operand:SI 1 "register_operand" "r") + (reg:SI R2_REG) (reg:SI R3_REG)] + UNSPEC_INIT_TRAMP) + + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + (define_insn "movqi_i" [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l") (match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))] @@ -7198,62 +7231,6 @@ "jsr @r0%#" [(set_attr "needs_delay_slot" "yes")]) -;; ??? could make arg 0 an offsettable memory operand - and do likewise -;; for cache invalidation - to allow to save an add in the code that -;; calculates the address. -(define_insn "shmedia32_initialize_trampoline_big" - [(set (mem:BLK (match_operand:SI 0 "arith_reg_operand" "r")) - (unspec [(match_operand:SI 1 "arith_reg_operand" "r") - (match_operand:SI 2 "arith_reg_operand" "r")] - UNSPEC_INIT_TRAMP)) - (clobber (match_scratch:SI 3 "=&r")) - (clobber (match_scratch:SI 4 "=&r"))] - "TARGET_SHMEDIA32 && ! TARGET_LITTLE_ENDIAN" - "movi 0x433,%3 - shori 0x432,%3 - mshflo.w %1,%3,%4 - mextr7 %4,%4,%4 - shlli %4,2,%4 - st.q %0,0,%4 - mshflo.w %2,%3,%4 - shlli %4,10,%4 - addi %4,0x10,%4 - movi 0x6bf1,%3 - shori 0x0600,%3 - mextr4 %4,%3,%3 - st.q %0,8,%3 - shori 0x4401,%4 - shori 0xfff0,%4 - st.q %0,16,%4" - [(set_attr "length" "64")]) - -(define_insn "shmedia32_initialize_trampoline_little" - [(set (mem:BLK (match_operand:SI 0 "arith_reg_operand" "r")) - (unspec [(match_operand:SI 1 "arith_reg_operand" "r") - (match_operand:SI 2 "arith_reg_operand" "r")] - UNSPEC_INIT_TRAMP)) - (clobber (match_scratch:SI 3 "=&r")) - (clobber (match_scratch:SI 4 "=&r"))] - "TARGET_SHMEDIA32 && TARGET_LITTLE_ENDIAN" - "movi 0x433,%3 - shori 0x432,%3 - mshflo.w %1,%3,%4 - mextr3 %4,%4,%4 - shlli %4,2,%4 - st.q %0,0,%4 - mshflo.w %2,%3,%4 - shlli %4,10,%4 - addi %4,0x10,%4 - movi 0x6bf1,%3 - shori 0x0600,%3 - shori 0x4401,%3 - shori 0xfff0,%3 - st.l %0,16,%r4 - st.l %0,20,%r3 - mshfhi.l %3,%4,%4 - st.q %0,8,%4" - [(set_attr "length" "68")]) - (define_expand "prologue" [(const_int 0)] "" @@ -10263,6 +10240,29 @@ "mshflo.l %N2, %N1, %0" [(set_attr "type" "arith_media")]) +;; Combiner pattern for trampoline initialization. +(define_insn_and_split "*double_shori" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 32)) + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA + && INTVAL (operands[2]) == trunc_int_for_mode (INTVAL (operands[2]), SImode)" + "#" + "rtx_equal_p (operands[0], operands[1])" + [(const_int 0)] + " +{ + HOST_WIDE_INT v = INTVAL (operands[2]); + + emit_insn (gen_shori_media (operands[0], operands[0], + gen_int_mode (INTVAL (operands[2]) >> 16, HImode))); + emit_insn (gen_shori_media (operands[0], operands[0], + gen_int_mode (v, HImode))); + DONE; +}") + + (define_insn "*mshflo_l_di_x" [(set (match_operand:DI 0 "arith_reg_dest" "=r") (ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand"