mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-03 07:10:28 +08:00
Introduce sh4a support.
gcc/ChangeLog: Introduce sh4a support. * config.gcc: Handle sh4a multilibs and cpu selection. * config/sh/sh.h: Likewise. Handle sh4a command line flags. * config/sh/t-mlib-sh4a: New. * config/sh/t-mlib-sh4al: New. * config/sh/t-mlib-sh4a-nofpu: New. * config/sh/t-mlib-sh4a-single: New. * config/sh/t-mlib-sh4a-single-only: New. 2004-02-20 DJ Delorie <dj@redhat.com> * config/sh/sh.md ("movua"): Change constraint from "m" to "Sua". * config/sh/sh.h (EXTRA_CONSTRAINT_S): Add "Sua" support. 2003-08-22 Eric Christopher <echristo@redhat.com> * config/sh/sh4a.md: Update for chip errata. 2003-08-07 Eric Christopher <echristo@redhat.com> * config/sh/sh4a.md: New file. sh4a processor description. 2003-07-08 Alexandre Oliva <aoliva@redhat.com> * config/sh/sh.h (TARGET_SWITCHES): Added 4al. Adjust description of -m4a-nofpu. (SH_ASM_SPEC): Pass -dsp for -m4al, not -m4a-nofpu. * config/sh/t-sh (MULTILIB_MATCHES): Map -m4al to -m4a-nofpu. * doc/invoke.texi (SH Options): Document -m4al. 2003-07-03 Alexandre Oliva <aoliva@redhat.com> * config/sh/sh.c (expand_block_move): Remove commented-out code checked in by mistake. (sh_cannot_change_mode_class): Enable SUBREGs to be used to select single elements from SFmode vectors. * config/sh/sh.md (fsca): Use VEC_CONCAT to initialize the output register. (sinsf2, cossf2, sindf2, cosdf2): Don't emit CLOBBER. 2003-07-01 Alexandre Oliva <aoliva@redhat.com> * config/sh/sh.h (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Remove variable declarations. * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): New functions. (sh_fsca_sf2int_rtx, sh_fsca_df2int_rtx, sh_fsca_int2sf_rtx): New static variables. * config/sh/sh-protos.h (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Declare. * config/sh/sh.md: Adjust. * doc/invoke.texi (SH Options): Document new options. * config/sh/lib1funcs.asm (ic_invalidate): Remove SH4a forward compatibility from SH4 code. 2003-06-27 Alexandre Oliva <aoliva@redhat.com> * config/sh/sh.c (expand_block_move): Don't emit POST_INC too early. (memory_movsrc_operand): Renamed to... (unaligned_load_operand): ... this. Simplified. * config/sh/sh.h (PREDICATE_CODES): Adjust. * config/sh/sh.md (movua, extv, extzv): Likewise. Change movua's input operand to SImode, and adjust the others. Introduce post-increment by peephole. * config/sh/sh.c (expand_block_move): Give the target address the same mode as the temp reg. * config/sh/sh.c (expand_block_move): Use a temp reg for unaligned copying. 2003-06-26 Alexandre Oliva <aoliva@redhat.com> Introduce support for SH4a. * config/sh/lib1funcs.asm (ic_invalidate): Use icbi if __SH4A__. Emit 4 4kb blocks and touch all of them otherwise. * config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): New. (sh_init_builtins): Initialize them. (print_operand): Support `d'. (expand_block_move): Use movua if src is misaligned. (memory_movsrc_operand): New. * config/sh/sh.h (TARGET_CPU_CPP_BUILTINS): Define __SH4A__ and one of the SH4 macros. (SH4A_BIT, TARGET_SH4A_ARCH, TARGET_SH4A_FP, SELECT_SH4A_NOFPU, SELECT_SH4A_SINGLE_ONLY, SELECT_SH4A, SELECT_SH4A_SINGLE): New. (TARGET_NONE): Add SH4A_BIT. (TARGET_SWITCHES): Add 4a-single-only, 4a-single, 4a-nofpu and 4a. (SH_ASM_SPEC): Pass -dsp if -m4a-nofpu. (sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Declare. (OVERRIDE_OPTIONS): Set cpu to CPU_SH4A when appropriate. (enum processor_type): Added PROCESSOR_SH4A. (PREDICATE_CODES): Add memory_movsrc_operand. * config/sh/sh.md: Removed unused variables. (attr cpu): Add sh4a. (attr type): Add movua, fsrra and fsca. (prefetch): New, for SH4. (ic_invalidate_line, ic_invalidate_line_sh4a): Use icbi. (toggle_sz): Set type to fp. (toggle_pr, rsqrtsf2, fsca, sinsf2, cossf2, sindf2, cosdf2): New. (movua, extv, extzv): New. * config/sh/t-sh: Add multilibs for 4a, 4a-nofpu, 4a-single and 4a-single-only. gcc/testsuite/ChangeLog: 2003-07-06 Alexandre Oliva <aoliva@redhat.com> * gcc.dg/sh4a-memmovua.c: Tweak regular expression. 2003-07-01 Alexandre Oliva <aoliva@redhat.com> * gcc.dg/sh4a-bitmovua.c: New. * gcc.dg/sh4a-cos.c: New. * gcc.dg/sh4a-cosf.c: New. * gcc.dg/sh4a-fprun.c: New. * gcc.dg/sh4a-fsrra.c: New. * gcc.dg/sh4a-memmovua.c: New. * gcc.dg/sh4a-sin.c: New. * gcc.dg/sh4a-sincos.c: New. * gcc.dg/sh4a-sincosf.c: New. * gcc.dg/sh4a-sinf.c: New. libstdc++-v3/ChangeLog: 2003-10-01 Eric Christopher <echristo@redhat.com> * config/cpu/sh/atomicity.h (__exchange_and_add): Remove 'm' constraint. 2003-07-09 Alexandre Oliva <aoliva@redhat.com> * config/cpu/sh/atomicity.h: New. Use movli and movco on SH4a. From-SVN: r85257
This commit is contained in:
parent
0871761b07
commit
312209c6a5
@ -1,3 +1,93 @@
|
||||
2004-07-28 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
Introduce sh4a support.
|
||||
* config.gcc: Handle sh4a multilibs and cpu selection.
|
||||
* config/sh/sh.h: Likewise. Handle sh4a command line flags.
|
||||
* config/sh/t-mlib-sh4a: New.
|
||||
* config/sh/t-mlib-sh4al: New.
|
||||
* config/sh/t-mlib-sh4a-nofpu: New.
|
||||
* config/sh/t-mlib-sh4a-single: New.
|
||||
* config/sh/t-mlib-sh4a-single-only: New.
|
||||
2004-02-20 DJ Delorie <dj@redhat.com>
|
||||
* config/sh/sh.md ("movua"): Change constraint from "m" to "Sua".
|
||||
* config/sh/sh.h (EXTRA_CONSTRAINT_S): Add "Sua" support.
|
||||
2003-08-22 Eric Christopher <echristo@redhat.com>
|
||||
* config/sh/sh4a.md: Update for chip errata.
|
||||
2003-08-07 Eric Christopher <echristo@redhat.com>
|
||||
* config/sh/sh4a.md: New file. sh4a processor description.
|
||||
2003-07-08 Alexandre Oliva <aoliva@redhat.com>
|
||||
* config/sh/sh.h (TARGET_SWITCHES): Added 4al. Adjust description
|
||||
of -m4a-nofpu.
|
||||
(SH_ASM_SPEC): Pass -dsp for -m4al, not -m4a-nofpu.
|
||||
* config/sh/t-sh (MULTILIB_MATCHES): Map -m4al to -m4a-nofpu.
|
||||
* doc/invoke.texi (SH Options): Document -m4al.
|
||||
2003-07-03 Alexandre Oliva <aoliva@redhat.com>
|
||||
* config/sh/sh.c (expand_block_move): Remove commented-out code
|
||||
checked in by mistake.
|
||||
(sh_cannot_change_mode_class): Enable SUBREGs to be used to select
|
||||
single elements from SFmode vectors.
|
||||
* config/sh/sh.md (fsca): Use VEC_CONCAT to initialize the output
|
||||
register.
|
||||
(sinsf2, cossf2, sindf2, cosdf2): Don't emit CLOBBER.
|
||||
2003-07-01 Alexandre Oliva <aoliva@redhat.com>
|
||||
* config/sh/sh.h (sh_fsca_sf2int, sh_fsca_df2int,
|
||||
sh_fsca_int2sf): Remove variable declarations.
|
||||
* config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int,
|
||||
sh_fsca_int2sf): New functions.
|
||||
(sh_fsca_sf2int_rtx, sh_fsca_df2int_rtx,
|
||||
sh_fsca_int2sf_rtx): New static variables.
|
||||
* config/sh/sh-protos.h (sh_fsca_sf2int, sh_fsca_df2int,
|
||||
sh_fsca_int2sf): Declare.
|
||||
* config/sh/sh.md: Adjust.
|
||||
* doc/invoke.texi (SH Options): Document new options.
|
||||
* config/sh/lib1funcs.asm (ic_invalidate): Remove SH4a forward
|
||||
compatibility from SH4 code.
|
||||
2003-06-27 Alexandre Oliva <aoliva@redhat.com>
|
||||
* config/sh/sh.c (expand_block_move): Don't emit POST_INC too
|
||||
early.
|
||||
(memory_movsrc_operand): Renamed to...
|
||||
(unaligned_load_operand): ... this. Simplified.
|
||||
* config/sh/sh.h (PREDICATE_CODES): Adjust.
|
||||
* config/sh/sh.md (movua, extv, extzv): Likewise. Change movua's
|
||||
input operand to SImode, and adjust the others. Introduce
|
||||
post-increment by peephole.
|
||||
* config/sh/sh.c (expand_block_move): Give the target address the
|
||||
same mode as the temp reg.
|
||||
* config/sh/sh.c (expand_block_move): Use a temp reg for unaligned
|
||||
copying.
|
||||
2003-06-26 Alexandre Oliva <aoliva@redhat.com>
|
||||
Introduce support for SH4a.
|
||||
* config/sh/lib1funcs.asm (ic_invalidate): Use icbi if
|
||||
__SH4A__. Emit 4 4kb blocks and touch all of them otherwise.
|
||||
* config/sh/sh.c (sh_fsca_sf2int, sh_fsca_df2int,
|
||||
sh_fsca_int2sf): New.
|
||||
(sh_init_builtins): Initialize them.
|
||||
(print_operand): Support `d'.
|
||||
(expand_block_move): Use movua if src is misaligned.
|
||||
(memory_movsrc_operand): New.
|
||||
* config/sh/sh.h (TARGET_CPU_CPP_BUILTINS): Define __SH4A__
|
||||
and one of the SH4 macros.
|
||||
(SH4A_BIT, TARGET_SH4A_ARCH, TARGET_SH4A_FP,
|
||||
SELECT_SH4A_NOFPU, SELECT_SH4A_SINGLE_ONLY, SELECT_SH4A,
|
||||
SELECT_SH4A_SINGLE): New.
|
||||
(TARGET_NONE): Add SH4A_BIT.
|
||||
(TARGET_SWITCHES): Add 4a-single-only, 4a-single, 4a-nofpu and 4a.
|
||||
(SH_ASM_SPEC): Pass -dsp if -m4a-nofpu.
|
||||
(sh_fsca_sf2int, sh_fsca_df2int, sh_fsca_int2sf): Declare.
|
||||
(OVERRIDE_OPTIONS): Set cpu to CPU_SH4A when appropriate.
|
||||
(enum processor_type): Added PROCESSOR_SH4A.
|
||||
(PREDICATE_CODES): Add memory_movsrc_operand.
|
||||
* config/sh/sh.md: Removed unused variables.
|
||||
(attr cpu): Add sh4a.
|
||||
(attr type): Add movua, fsrra and fsca.
|
||||
(prefetch): New, for SH4.
|
||||
(ic_invalidate_line, ic_invalidate_line_sh4a): Use icbi.
|
||||
(toggle_sz): Set type to fp.
|
||||
(toggle_pr, rsqrtsf2, fsca, sinsf2, cossf2, sindf2, cosdf2): New.
|
||||
(movua, extv, extzv): New.
|
||||
* config/sh/t-sh: Add multilibs for 4a, 4a-nofpu, 4a-single
|
||||
and 4a-single-only.
|
||||
|
||||
2004-07-28 Diego Novillo <dnovillo@redhat.com>
|
||||
|
||||
* tree-optimize.c (init_tree_optimization_passes): Schedule
|
||||
|
@ -1746,6 +1746,11 @@ sh-*-symbianelf* | sh[12346l]*-*-symbianelf* | \
|
||||
case `echo ${target} | sed 's/e[lb]-/-/'` in
|
||||
sh64*-*-netbsd*) sh_cpu_target=sh5-64media ;;
|
||||
sh64* | sh5*-*-netbsd*) sh_cpu_target=sh5-32media ;;
|
||||
sh4a_single_only*) sh_cpu_target=sh4a-single-only ;;
|
||||
sh4a_single*) sh_cpu_target=sh4a-single ;;
|
||||
sh4a_nofpu*) sh_cpu_target=sh4a-nofpu ;;
|
||||
sh4al) sh_cpu_target=sh4al ;;
|
||||
sh4a*) sh_cpu_target=sh4a ;;
|
||||
sh4_single_only*) sh_cpu_target=sh4-single-only ;;
|
||||
sh4_single*) sh_cpu_target=sh4-single ;;
|
||||
sh4_nofpu*) sh_cpu_target=sh4-nofpu ;;
|
||||
@ -1760,6 +1765,7 @@ sh-*-symbianelf* | sh[12346l]*-*-symbianelf* | \
|
||||
case $sh_cpu_default in
|
||||
sh5-64media-nofpu | sh5-64media | \
|
||||
sh5-32media-nofpu | sh5-32media | sh5-compact-nofpu | sh5-compact | \
|
||||
sh4a-single-only | sh4a-single | sh4a-nofpu | sh4a | sh4al | \
|
||||
sh4-single-only | sh4-single | sh4-nofpu | sh4 | \
|
||||
sh3e | sh3 | sh2e | sh2 | sh1) ;;
|
||||
"") sh_cpu_default=${sh_cpu_target} ;;
|
||||
@ -1783,6 +1789,7 @@ sh-*-symbianelf* | sh[12346l]*-*-symbianelf* | \
|
||||
case ${sh_multilib} in
|
||||
sh1 | sh2 | sh2e | sh3 | sh3e | \
|
||||
sh4 | sh4-single | sh4-single-only | sh4-nofpu | \
|
||||
sh4a | sh4a-single | sh4a-single-only | sh4a-nofpu | sh4al | \
|
||||
sh5-64media | sh5-64media-nofpu | \
|
||||
sh5-32media | sh5-32media-nofpu | \
|
||||
sh5-compact | sh5-compact-nofpu)
|
||||
@ -2476,9 +2483,12 @@ fi
|
||||
"" | m1 | m2 | m2e | m3 | m3e | m4 | m4-single | m4-single-only | m4-nofpu )
|
||||
# OK
|
||||
;;
|
||||
m4a | m4a-single | m4a-single-only | m4a-nofpu | m4al)
|
||||
;;
|
||||
*)
|
||||
echo "Unknown CPU used in --with-cpu=$with_cpu, known values:" 1>&2
|
||||
echo "m1 m2 m2e m3 m3e m4 m4-single m4-single-only m4-nofpu" 1>&2
|
||||
echo "m4a m4a-single m4a-single-only m4a-nofpu m4al" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
@ -2036,7 +2036,34 @@ GLOBAL(ic_invalidate):
|
||||
|
||||
ENDFUNC(GLOBAL(ic_invalidate))
|
||||
ENDFUNC(GLOBAL(init_trampoline))
|
||||
#elif defined(__SH4A__)
|
||||
.global GLOBAL(ic_invalidate)
|
||||
FUNC(GLOBAL(ic_invalidate))
|
||||
GLOBAL(ic_invalidate):
|
||||
ocbwb @r4
|
||||
synco
|
||||
rts
|
||||
icbi @r4
|
||||
ENDFUNC(GLOBAL(ic_invalidate))
|
||||
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
|
||||
/* This assumes a direct-mapped cache, which is the case for
|
||||
the first SH4, but not for the second version of SH4, that
|
||||
uses a 2-way set-associative cache, nor SH4a, that is 4-way.
|
||||
SH4a fortunately offers an instruction to invalidate the
|
||||
instruction cache, and we use it above, but SH4 doesn't.
|
||||
However, since the libraries don't contain any nested
|
||||
functions (the only case in which GCC would emit this pattern)
|
||||
and we actually emit the ic_invalidate_line_i pattern for
|
||||
cache invalidation on all SH4 multilibs (even 4-nofpu, that
|
||||
isn't even corevered here), and pre-SH4 cores don't have
|
||||
caches, it seems like this code is pointless, unless it's
|
||||
meant for backward binary compatibility or for userland-only
|
||||
cache invalidation for say sh4-*-linux-gnu. Such a feature
|
||||
should probably be moved into a system call, such that the
|
||||
kernel could do whatever it takes to invalidate a cache line
|
||||
on the core it's actually running on. I.e., this hideous :-)
|
||||
piece of code should go away at some point. */
|
||||
|
||||
.global GLOBAL(ic_invalidate)
|
||||
FUNC(GLOBAL(ic_invalidate))
|
||||
GLOBAL(ic_invalidate):
|
||||
|
@ -25,6 +25,9 @@ Boston, MA 02111-1307, USA. */
|
||||
#define GCC_SH_PROTOS_H
|
||||
|
||||
#ifdef RTX_CODE
|
||||
extern rtx sh_fsca_sf2int (void);
|
||||
extern rtx sh_fsca_df2int (void);
|
||||
extern rtx sh_fsca_int2sf (void);
|
||||
extern struct rtx_def *prepare_scc_operands (enum rtx_code);
|
||||
|
||||
/* Declare functions defined in sh.c and used in templates. */
|
||||
|
@ -537,6 +537,7 @@ print_operand_address (FILE *stream, rtx x)
|
||||
'T' print the next word of a dp value - same as 'R' in big endian mode.
|
||||
'M' print an `x' if `m' will print `base,index'.
|
||||
'N' print 'r63' if the operand is (const_int 0).
|
||||
'd' print a V2SF reg as dN instead of fpN.
|
||||
'm' print a pair `base,offset' or `base,index', for LD and ST.
|
||||
'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
|
||||
'o' output an operator. */
|
||||
@ -651,6 +652,13 @@ print_operand (FILE *stream, rtx x, int code)
|
||||
}
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
|
||||
abort ();
|
||||
|
||||
fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
|
||||
break;
|
||||
|
||||
case 'N':
|
||||
if (x == CONST0_RTX (GET_MODE (x)))
|
||||
{
|
||||
@ -772,9 +780,48 @@ expand_block_move (rtx *operands)
|
||||
int constp = (GET_CODE (operands[2]) == CONST_INT);
|
||||
int bytes = (constp ? INTVAL (operands[2]) : 0);
|
||||
|
||||
if (! constp)
|
||||
return 0;
|
||||
|
||||
/* If we could use mov.l to move words and dest is word-aligned, we
|
||||
can use movua.l for loads and still generate a relatively short
|
||||
and efficient sequence. */
|
||||
if (TARGET_SH4A_ARCH && align < 4
|
||||
&& MEM_ALIGN (operands[0]) >= 32
|
||||
&& can_move_by_pieces (bytes, 32))
|
||||
{
|
||||
rtx dest = copy_rtx (operands[0]);
|
||||
rtx src = copy_rtx (operands[1]);
|
||||
/* We could use different pseudos for each copied word, but
|
||||
since movua can only load into r0, it's kind of
|
||||
pointless. */
|
||||
rtx temp = gen_reg_rtx (SImode);
|
||||
rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
|
||||
int copied = 0;
|
||||
|
||||
while (copied + 4 <= bytes)
|
||||
{
|
||||
rtx to = adjust_address (dest, SImode, copied);
|
||||
rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
|
||||
|
||||
emit_insn (gen_movua (temp, from));
|
||||
emit_move_insn (src_addr, plus_constant (src_addr, 4));
|
||||
emit_move_insn (to, temp);
|
||||
copied += 4;
|
||||
}
|
||||
|
||||
if (copied < bytes)
|
||||
move_by_pieces (adjust_address (dest, BLKmode, copied),
|
||||
adjust_automodify_address (src, BLKmode,
|
||||
src_addr, copied),
|
||||
bytes - copied, align, 0);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If it isn't a constant number of bytes, or if it doesn't have 4 byte
|
||||
alignment, or if it isn't a multiple of 4 bytes, then fail. */
|
||||
if (! constp || align < 4 || (bytes % 4 != 0))
|
||||
if (align < 4 || (bytes % 4 != 0))
|
||||
return 0;
|
||||
|
||||
if (TARGET_HARD_SH4)
|
||||
@ -9397,6 +9444,11 @@ bool
|
||||
sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
|
||||
enum reg_class class)
|
||||
{
|
||||
/* We want to enable the use of SUBREGs as a means to
|
||||
VEC_SELECT a single element of a vector. */
|
||||
if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
|
||||
return (reg_classes_intersect_p (GENERAL_REGS, class));
|
||||
|
||||
if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
|
||||
{
|
||||
if (TARGET_LITTLE_ENDIAN)
|
||||
@ -9821,4 +9873,87 @@ check_use_sfunc_addr (rtx insn, rtx reg)
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Returns 1 if OP is a MEM that can be source of a simple move operation. */
|
||||
|
||||
int
|
||||
unaligned_load_operand (rtx op, enum machine_mode mode)
|
||||
{
|
||||
rtx inside;
|
||||
|
||||
if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
|
||||
return 0;
|
||||
|
||||
inside = XEXP (op, 0);
|
||||
|
||||
if (GET_CODE (inside) == POST_INC)
|
||||
inside = XEXP (inside, 0);
|
||||
|
||||
if (GET_CODE (inside) == REG)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function returns a constant rtx that represents pi / 2**15 in
|
||||
SFmode. it's used to scale SFmode angles, in radians, to a
|
||||
fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
|
||||
maps to 0x10000). */
|
||||
|
||||
static GTY(()) rtx sh_fsca_sf2int_rtx;
|
||||
|
||||
rtx
|
||||
sh_fsca_sf2int (void)
|
||||
{
|
||||
if (! sh_fsca_sf2int_rtx)
|
||||
{
|
||||
REAL_VALUE_TYPE rv;
|
||||
|
||||
real_from_string (&rv, "10430.378350470453");
|
||||
sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
|
||||
}
|
||||
|
||||
return sh_fsca_sf2int_rtx;
|
||||
}
|
||||
|
||||
/* This function returns a constant rtx that represents pi / 2**15 in
|
||||
DFmode. it's used to scale DFmode angles, in radians, to a
|
||||
fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
|
||||
maps to 0x10000). */
|
||||
|
||||
static GTY(()) rtx sh_fsca_df2int_rtx;
|
||||
|
||||
rtx
|
||||
sh_fsca_df2int (void)
|
||||
{
|
||||
if (! sh_fsca_df2int_rtx)
|
||||
{
|
||||
REAL_VALUE_TYPE rv;
|
||||
|
||||
real_from_string (&rv, "10430.378350470453");
|
||||
sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
|
||||
}
|
||||
|
||||
return sh_fsca_df2int_rtx;
|
||||
}
|
||||
|
||||
/* This function returns a constant rtx that represents 2**15 / pi in
|
||||
SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
|
||||
of a full circle back to a SFmode value, i.e., 0x10000 maps to
|
||||
2*pi). */
|
||||
|
||||
static GTY(()) rtx sh_fsca_int2sf_rtx;
|
||||
|
||||
rtx
|
||||
sh_fsca_int2sf (void)
|
||||
{
|
||||
if (! sh_fsca_int2sf_rtx)
|
||||
{
|
||||
REAL_VALUE_TYPE rv;
|
||||
|
||||
real_from_string (&rv, "9.587379924285257e-5");
|
||||
sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
|
||||
}
|
||||
|
||||
return sh_fsca_int2sf_rtx;
|
||||
}
|
||||
#include "gt-sh.h"
|
||||
|
@ -60,6 +60,13 @@ do { \
|
||||
case PROCESSOR_SH4: \
|
||||
builtin_define (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__"); \
|
||||
break; \
|
||||
case PROCESSOR_SH4A: \
|
||||
builtin_define ("__SH4A__"); \
|
||||
builtin_define (TARGET_SH4 \
|
||||
? (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__") \
|
||||
: TARGET_FPU_ANY ? "__SH4_SINGLE_ONLY__" \
|
||||
: "__SH4_NOFPU__"); \
|
||||
break; \
|
||||
case PROCESSOR_SH5: \
|
||||
{ \
|
||||
builtin_define_with_value ("__SH5__", \
|
||||
@ -138,6 +145,7 @@ extern int target_flags;
|
||||
#define HARD_SH4_BIT (1<<5)
|
||||
#define FPU_SINGLE_BIT (1<<7)
|
||||
#define SH4_BIT (1<<12)
|
||||
#define SH4A_BIT (1<<3)
|
||||
#define FMOVD_BIT (1<<4)
|
||||
#define SH5_BIT (1<<0)
|
||||
#define SPACE_BIT (1<<13)
|
||||
@ -200,6 +208,14 @@ extern int target_flags;
|
||||
/* Nonzero if we should generate code using type 4 insns. */
|
||||
#define TARGET_SH4 ((target_flags & SH4_BIT) && (target_flags & SH1_BIT))
|
||||
|
||||
/* Nonzero if we're generating code for the common subset of
|
||||
instructions present on both SH4a and SH4al-dsp. */
|
||||
#define TARGET_SH4A_ARCH (target_flags & SH4A_BIT)
|
||||
|
||||
/* Nonzero if we're generating code for SH4a, unless the use of the
|
||||
FPU is disabled (which makes it compatible with SH4al-dsp). */
|
||||
#define TARGET_SH4A_FP (TARGET_SH4A_ARCH && TARGET_FPU_ANY)
|
||||
|
||||
/* Nonzero if we should generate code for a SH5 CPU (either ISA). */
|
||||
#define TARGET_SH5 (target_flags & SH5_BIT)
|
||||
|
||||
@ -285,6 +301,10 @@ extern int target_flags;
|
||||
#define SELECT_SH4_SINGLE_ONLY (HARD_SH4_BIT | SELECT_SH3E)
|
||||
#define SELECT_SH4 (SH4_BIT | SH_E_BIT | HARD_SH4_BIT | SELECT_SH3)
|
||||
#define SELECT_SH4_SINGLE (FPU_SINGLE_BIT | SELECT_SH4)
|
||||
#define SELECT_SH4A_NOFPU (SH4A_BIT | SELECT_SH4_NOFPU)
|
||||
#define SELECT_SH4A_SINGLE_ONLY (SH4A_BIT | SELECT_SH4_SINGLE_ONLY)
|
||||
#define SELECT_SH4A (SH4A_BIT | SELECT_SH4)
|
||||
#define SELECT_SH4A_SINGLE (SH4A_BIT | SELECT_SH4_SINGLE)
|
||||
#define SELECT_SH5_64MEDIA (SH5_BIT | SH4_BIT)
|
||||
#define SELECT_SH5_64MEDIA_NOFPU (SH5_BIT)
|
||||
#define SELECT_SH5_32MEDIA (SH5_BIT | SH4_BIT | SH_E_BIT)
|
||||
@ -302,6 +322,12 @@ extern int target_flags;
|
||||
#ifndef SUPPORT_SH4_NOFPU
|
||||
#define TARGET_SWITCH_SH4_NOFPU
|
||||
#endif
|
||||
#ifndef SUPPORT_SH4A_NOFPU
|
||||
#define TARGET_SWITCH_SH4A_NOFPU
|
||||
#endif
|
||||
#ifndef SUPPORT_SH4AL
|
||||
#define TARGET_SWITCH_SH4AL
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
@ -313,15 +339,24 @@ extern int target_flags;
|
||||
#ifndef SUPPORT_SH4_SINGLE_ONLY
|
||||
#define TARGET_SWITCH_SH4_SINGLE_ONLY
|
||||
#endif
|
||||
#ifndef SUPPORT_SH4A_SINGLE_ONLY
|
||||
#define TARGET_SWITCH_SH4A_SINGLE_ONLY
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_SH4
|
||||
#define TARGET_SWITCH_SH4
|
||||
#ifndef SUPPORT_SH4A
|
||||
#define TARGET_SWITCH_SH4A
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_SH4_SINGLE
|
||||
#define TARGET_SWITCH_SH4_SINGLE
|
||||
#ifndef SUPPORT_SH4A_SINGLE
|
||||
#define TARGET_SWITCH_SH4A_SINGLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_SH5_64MEDIA
|
||||
@ -342,7 +377,7 @@ extern int target_flags;
|
||||
|
||||
/* Reset all target-selection flags. */
|
||||
#define TARGET_NONE -(SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | SH4_BIT \
|
||||
| HARD_SH4_BIT | FPU_SINGLE_BIT | SH5_BIT)
|
||||
| SH4A_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT | SH5_BIT)
|
||||
|
||||
#ifndef TARGET_SWITCH_SH1
|
||||
#define TARGET_SWITCH_SH1 \
|
||||
@ -389,6 +424,31 @@ extern int target_flags;
|
||||
{"4", TARGET_NONE, "" }, \
|
||||
{"4", SELECT_SH4, "Generate SH4 code" },
|
||||
#endif
|
||||
#ifndef TARGET_SWITCH_SH4A
|
||||
#define TARGET_SWITCH_SH4A \
|
||||
{"4a", TARGET_NONE, "" }, \
|
||||
{"4a", SELECT_SH4A, "Generate SH4a code" },
|
||||
#endif
|
||||
#ifndef TARGET_SWITCH_SH4A_SINGLE_ONLY
|
||||
#define TARGET_SWITCH_SH4A_SINGLE_ONLY \
|
||||
{"4a-single-only", TARGET_NONE, "" }, \
|
||||
{"4a-single-only", SELECT_SH4A_SINGLE_ONLY, "Generate only single-precision SH4a code" },
|
||||
#endif
|
||||
#ifndef TARGET_SWITCH_SH4A_SINGLE
|
||||
#define TARGET_SWITCH_SH4A_SINGLE \
|
||||
{"4a-single", TARGET_NONE, "" },\
|
||||
{"4a-single", SELECT_SH4A_SINGLE, "Generate default single-precision SH4a code" },
|
||||
#endif
|
||||
#ifndef TARGET_SWITCH_SH4A_NOFPU
|
||||
#define TARGET_SWITCH_SH4A_NOFPU \
|
||||
{"4a-nofpu", TARGET_NONE, "" },\
|
||||
{"4a-nofpu", SELECT_SH4A_NOFPU, "Generate SH4a FPU-less code" },
|
||||
#endif
|
||||
#ifndef TARGET_SWITCH_SH4AL
|
||||
#define TARGET_SWITCH_SH4AL \
|
||||
{"4al", TARGET_NONE, "" },\
|
||||
{"4al", SELECT_SH4A_NOFPU, "Generate SH4al-dsp code" },
|
||||
#endif
|
||||
#ifndef TARGET_SWITCH_SH5_64MEDIA
|
||||
#define TARGET_SWITCH_SH5_64MEDIA \
|
||||
{"5-64media", TARGET_NONE, "" }, \
|
||||
@ -424,6 +484,11 @@ extern int target_flags;
|
||||
TARGET_SWITCH_SH4_SINGLE \
|
||||
TARGET_SWITCH_SH4_NOFPU \
|
||||
TARGET_SWITCH_SH4 \
|
||||
TARGET_SWITCH_SH4A_SINGLE_ONLY \
|
||||
TARGET_SWITCH_SH4A_SINGLE \
|
||||
TARGET_SWITCH_SH4A_NOFPU \
|
||||
TARGET_SWITCH_SH4A \
|
||||
TARGET_SWITCH_SH4AL \
|
||||
TARGET_SWITCH_SH5_64MEDIA \
|
||||
TARGET_SWITCH_SH5_64MEDIA_NOFPU \
|
||||
TARGET_SWITCHES_SH5_32MEDIA \
|
||||
@ -497,7 +562,7 @@ extern int target_flags;
|
||||
|
||||
#define SH_ASM_SPEC \
|
||||
"%(subtarget_asm_endian_spec) %{mrelax:-relax %(subtarget_asm_relax_spec)}\
|
||||
%(subtarget_asm_isa_spec)"
|
||||
%(subtarget_asm_isa_spec) %{m4al:-dsp}"
|
||||
|
||||
#define ASM_SPEC SH_ASM_SPEC
|
||||
|
||||
@ -584,6 +649,11 @@ do { \
|
||||
assembler_dialect = 1; \
|
||||
sh_cpu = CPU_SH4; \
|
||||
} \
|
||||
if (TARGET_SH4A_ARCH) \
|
||||
{ \
|
||||
assembler_dialect = 1; \
|
||||
sh_cpu = CPU_SH4A; \
|
||||
} \
|
||||
if (TARGET_SH5) \
|
||||
{ \
|
||||
sh_cpu = CPU_SH5; \
|
||||
@ -2441,8 +2511,12 @@ struct sh_args {
|
||||
#define EXTRA_CONSTRAINT_Sr0(OP) \
|
||||
(memory_operand((OP), GET_MODE (OP)) \
|
||||
&& ! refers_to_regno_p (R0_REG, R0_REG + 1, OP, (rtx *)0))
|
||||
#define EXTRA_CONSTRAINT_Sua(OP) \
|
||||
(memory_operand((OP), GET_MODE (OP)) \
|
||||
&& GET_CODE (XEXP (OP, 0)) != PLUS)
|
||||
#define EXTRA_CONSTRAINT_S(OP, STR) \
|
||||
((STR)[1] == 'r' && (STR)[2] == '0' ? EXTRA_CONSTRAINT_Sr0 (OP) \
|
||||
: (STR)[1] == 'u' && (STR)[2] == 'a' ? EXTRA_CONSTRAINT_Sua (OP) \
|
||||
: 0)
|
||||
|
||||
#define EXTRA_CONSTRAINT_STR(OP, C, STR) \
|
||||
@ -3175,6 +3249,7 @@ enum processor_type {
|
||||
PROCESSOR_SH3,
|
||||
PROCESSOR_SH3E,
|
||||
PROCESSOR_SH4,
|
||||
PROCESSOR_SH4A,
|
||||
PROCESSOR_SH5
|
||||
};
|
||||
|
||||
@ -3245,6 +3320,7 @@ extern int rtx_equal_function_value_matters;
|
||||
{"general_extend_operand", {SUBREG, REG, MEM, TRUNCATE}}, \
|
||||
{"general_movsrc_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE, MEM}}, \
|
||||
{"general_movdst_operand", {SUBREG, REG, MEM}}, \
|
||||
{"unaligned_load_operand", {MEM}}, \
|
||||
{"greater_comparison_operator", {GT,GE,GTU,GEU}}, \
|
||||
{"int_gpr_dest", {SUBREG, REG}}, \
|
||||
{"inqhi_operand", {TRUNCATE}}, \
|
||||
|
@ -161,7 +161,7 @@
|
||||
;; Target CPU.
|
||||
|
||||
(define_attr "cpu"
|
||||
"sh1,sh2,sh2e,sh3,sh3e,sh4,sh5"
|
||||
"sh1,sh2,sh2e,sh3,sh3e,sh4,sh4a,sh5"
|
||||
(const (symbol_ref "sh_cpu_attr")))
|
||||
|
||||
(define_attr "endian" "big,little"
|
||||
@ -218,6 +218,9 @@
|
||||
;; ftrc_s fix_truncsfsi2_i4
|
||||
;; dfdiv double precision floating point divide (or square root)
|
||||
;; cwb ic_invalidate_line_i
|
||||
;; movua SH4a unaligned load
|
||||
;; fsrra square root reciprocal approximate
|
||||
;; fsca sine and cosine approximate
|
||||
;; tls_load load TLS related address
|
||||
;; arith_media SHmedia arithmetic, logical, and shift instructions
|
||||
;; cbranch_media SHmedia conditional branch instructions
|
||||
@ -249,7 +252,7 @@
|
||||
;; nil no-op move, will be deleted.
|
||||
|
||||
(define_attr "type"
|
||||
"mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
|
||||
"mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
|
||||
(const_string "other"))
|
||||
|
||||
;; We define a new attribute namely "insn_class".We use
|
||||
@ -3488,6 +3491,11 @@
|
||||
emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
|
||||
DONE;
|
||||
}
|
||||
else if (TARGET_SH4A_ARCH)
|
||||
{
|
||||
emit_insn (gen_ic_invalidate_line_sh4a (operands[0]));
|
||||
DONE;
|
||||
}
|
||||
operands[0] = force_reg (Pmode, operands[0]);
|
||||
operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008,
|
||||
Pmode)));
|
||||
@ -3508,6 +3516,14 @@
|
||||
[(set_attr "length" "8")
|
||||
(set_attr "type" "cwb")])
|
||||
|
||||
(define_insn "ic_invalidate_line_sh4a"
|
||||
[(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
|
||||
UNSPEC_ICACHE)]
|
||||
"TARGET_SH4A_ARCH"
|
||||
"ocbwb\\t@%0\;synco\;icbi\\t@%0"
|
||||
[(set_attr "length" "16")
|
||||
(set_attr "type" "cwb")])
|
||||
|
||||
;; ??? could make arg 0 an offsettable memory operand to allow to save
|
||||
;; an add in the code that calculates the address.
|
||||
(define_insn "ic_invalidate_line_media"
|
||||
@ -8151,7 +8167,19 @@ mov.l\\t1f,r0\\n\\
|
||||
(xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))]
|
||||
"TARGET_SH4"
|
||||
"fschg"
|
||||
[(set_attr "fp_set" "unknown")])
|
||||
[(set_attr "type" "fp") (set_attr "fp_set" "unknown")])
|
||||
|
||||
;; There's no way we can use it today, since optimize mode switching
|
||||
;; doesn't enable us to know from which mode we're switching to the
|
||||
;; mode it requests, to tell whether we can use a relative mode switch
|
||||
;; (like toggle_pr) or an absolute switch (like loading fpscr from
|
||||
;; memory).
|
||||
(define_insn "toggle_pr"
|
||||
[(set (reg:PSI FPSCR_REG)
|
||||
(xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))]
|
||||
"TARGET_SH4A_FP && ! TARGET_FPU_SINGLE"
|
||||
"fpchg"
|
||||
[(set_attr "type" "fp")])
|
||||
|
||||
(define_expand "addsf3"
|
||||
[(set (match_operand:SF 0 "arith_reg_operand" "")
|
||||
@ -8650,6 +8678,117 @@ mov.l\\t1f,r0\\n\\
|
||||
[(set_attr "type" "fdiv")
|
||||
(set_attr "fp_mode" "single")])
|
||||
|
||||
(define_insn "rsqrtsf2"
|
||||
[(set (match_operand:SF 0 "register_operand" "=f")
|
||||
(div:SF (match_operand:SF 1 "immediate_operand" "i")
|
||||
(sqrt:SF (match_operand:SF 2 "register_operand" "0"))))
|
||||
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
|
||||
"TARGET_SH4A_FP && flag_unsafe_math_optimizations
|
||||
&& operands[1] == CONST1_RTX (SFmode)"
|
||||
"fsrra %0"
|
||||
[(set_attr "type" "fsrra")
|
||||
(set_attr "fp_mode" "single")])
|
||||
|
||||
(define_insn "fsca"
|
||||
[(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
|
||||
(vec_concat:V2SF
|
||||
(unspec:SF [(mult:SF
|
||||
(float:SF (match_operand:SI 1 "fpul_operand" "y"))
|
||||
(match_operand:SF 2 "immediate_operand" "i"))
|
||||
] UNSPEC_FSINA)
|
||||
(unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2))
|
||||
] UNSPEC_FCOSA)))
|
||||
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
|
||||
"TARGET_SH4A_FP && flag_unsafe_math_optimizations
|
||||
&& operands[2] == sh_fsca_int2sf ()"
|
||||
"fsca fpul,%d0"
|
||||
[(set_attr "type" "fsca")
|
||||
(set_attr "fp_mode" "single")])
|
||||
|
||||
(define_expand "sinsf2"
|
||||
[(set (match_operand:SF 0 "nonimmediate_operand" "")
|
||||
(unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
|
||||
UNSPEC_FSINA))]
|
||||
"TARGET_SH4A_FP && flag_unsafe_math_optimizations"
|
||||
"
|
||||
{
|
||||
rtx scaled = gen_reg_rtx (SFmode);
|
||||
rtx truncated = gen_reg_rtx (SImode);
|
||||
rtx fsca = gen_reg_rtx (V2SFmode);
|
||||
rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
|
||||
|
||||
emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
|
||||
emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
|
||||
emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
|
||||
get_fpscr_rtx ()));
|
||||
emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 0));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "cossf2"
|
||||
[(set (match_operand:SF 0 "nonimmediate_operand" "")
|
||||
(unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
|
||||
UNSPEC_FCOSA))]
|
||||
"TARGET_SH4A_FP && flag_unsafe_math_optimizations"
|
||||
"
|
||||
{
|
||||
rtx scaled = gen_reg_rtx (SFmode);
|
||||
rtx truncated = gen_reg_rtx (SImode);
|
||||
rtx fsca = gen_reg_rtx (V2SFmode);
|
||||
rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
|
||||
|
||||
emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
|
||||
emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
|
||||
emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
|
||||
get_fpscr_rtx ()));
|
||||
emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "sindf2"
|
||||
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
|
||||
(unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
|
||||
UNSPEC_FSINA))]
|
||||
"TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
|
||||
"
|
||||
{
|
||||
rtx scaled = gen_reg_rtx (DFmode);
|
||||
rtx truncated = gen_reg_rtx (SImode);
|
||||
rtx fsca = gen_reg_rtx (V2SFmode);
|
||||
rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
|
||||
rtx sfresult = gen_reg_rtx (SFmode);
|
||||
|
||||
emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
|
||||
emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
|
||||
emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
|
||||
get_fpscr_rtx ()));
|
||||
emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 0));
|
||||
emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "cosdf2"
|
||||
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
|
||||
(unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
|
||||
UNSPEC_FCOSA))]
|
||||
"TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
|
||||
"
|
||||
{
|
||||
rtx scaled = gen_reg_rtx (DFmode);
|
||||
rtx truncated = gen_reg_rtx (SImode);
|
||||
rtx fsca = gen_reg_rtx (V2SFmode);
|
||||
rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
|
||||
rtx sfresult = gen_reg_rtx (SFmode);
|
||||
|
||||
emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
|
||||
emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
|
||||
emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
|
||||
get_fpscr_rtx ()));
|
||||
emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 4));
|
||||
emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "abssf2"
|
||||
[(set (match_operand:SF 0 "fp_arith_reg_operand" "")
|
||||
(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
|
||||
@ -9188,6 +9327,71 @@ mov.l\\t1f,r0\\n\\
|
||||
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_insn "movua"
|
||||
[(set (match_operand:SI 0 "register_operand" "=z")
|
||||
(sign_extract:SI (match_operand:SI 1 "unaligned_load_operand" "Sua>")
|
||||
(const_int 32) (const_int 0)))]
|
||||
"TARGET_SH4A_ARCH"
|
||||
"movua.l %1,%0"
|
||||
[(set_attr "type" "movua")])
|
||||
|
||||
;; We shouldn't need this, but cse replaces increments with references
|
||||
;; to other regs before flow has a chance to create post_inc
|
||||
;; addressing modes, and only postreload's cse_move2add brings the
|
||||
;; increments back to a usable form.
|
||||
(define_peephole2
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" ""))
|
||||
(const_int 32) (const_int 0)))
|
||||
(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
|
||||
"TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(sign_extract:SI (mem:SI (post_inc:SI
|
||||
(match_operand:SI 1 "register_operand" "")))
|
||||
(const_int 32) (const_int 0)))]
|
||||
"")
|
||||
|
||||
(define_expand "extv"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
|
||||
(match_operand 2 "const_int_operand" "")
|
||||
(match_operand 3 "const_int_operand" "")))]
|
||||
""
|
||||
{
|
||||
if (TARGET_SH4A_ARCH
|
||||
&& INTVAL (operands[2]) == 32
|
||||
&& INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
|
||||
&& GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32)
|
||||
{
|
||||
emit_insn (gen_movua (operands[0],
|
||||
adjust_address (operands[1], SImode, 0)));
|
||||
DONE;
|
||||
}
|
||||
|
||||
FAIL;
|
||||
})
|
||||
|
||||
(define_expand "extzv"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
|
||||
(match_operand 2 "const_int_operand" "")
|
||||
(match_operand 3 "const_int_operand" "")))]
|
||||
""
|
||||
{
|
||||
if (TARGET_SH4A_ARCH
|
||||
&& INTVAL (operands[2]) == 32
|
||||
&& INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
|
||||
&& GET_CODE (operands[1]) == MEM && MEM_ALIGN (operands[1]) < 32)
|
||||
{
|
||||
emit_insn (gen_movua (operands[0],
|
||||
adjust_address (operands[1], SImode, 0)));
|
||||
DONE;
|
||||
}
|
||||
|
||||
FAIL;
|
||||
})
|
||||
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; Peepholes
|
||||
@ -10661,9 +10865,11 @@ mov.l\\t1f,r0\\n\\
|
||||
[(prefetch (match_operand:QI 0 "address_operand" "p")
|
||||
(match_operand:SI 1 "const_int_operand" "n")
|
||||
(match_operand:SI 2 "const_int_operand" "n"))]
|
||||
"TARGET_SHMEDIA"
|
||||
"TARGET_SHMEDIA || TARGET_HARD_SH4"
|
||||
"*
|
||||
{
|
||||
if (TARGET_HARD_SH4)
|
||||
return \"pref @%0\";
|
||||
operands[0] = gen_rtx_MEM (QImode, operands[0]);
|
||||
output_asm_insn (\"ld%M0.b %m0,r63\", operands);
|
||||
return \"\";
|
||||
|
235
gcc/config/sh/sh4a.md
Normal file
235
gcc/config/sh/sh4a.md
Normal file
@ -0,0 +1,235 @@
|
||||
;; Scheduling description for Renesas SH4a
|
||||
;; Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GNU CC.
|
||||
;;
|
||||
;; GNU CC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GNU CC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU CC; see the file COPYING. If not, write to
|
||||
;; the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
;; Boston, MA 02111-1307, USA.
|
||||
|
||||
;; The following description models the SH4A pipeline
|
||||
;; using the DFA based scheduler.
|
||||
|
||||
(define_automaton "sh4a")
|
||||
|
||||
(define_cpu_unit "sh4a_ex" "sh4a")
|
||||
(define_cpu_unit "sh4a_ls" "sh4a")
|
||||
(define_cpu_unit "sh4a_fex" "sh4a")
|
||||
(define_cpu_unit "sh4a_fls" "sh4a")
|
||||
(define_cpu_unit "sh4a_mult" "sh4a")
|
||||
(define_cpu_unit "sh4a_fdiv" "sh4a")
|
||||
|
||||
;; Decoding is done on the integer pipeline like the
|
||||
;; sh4. Define issue to be the | of the two pipelines
|
||||
;; to control how often instructions are issued.
|
||||
(define_reservation "ID_or" "sh4a_ex|sh4a_ls")
|
||||
(define_reservation "ID_and" "sh4a_ex+sh4a_ls")
|
||||
|
||||
|
||||
;; =======================================================
|
||||
;; Locking Descriptions
|
||||
|
||||
;; Sh4a_Memory access on the LS pipeline.
|
||||
(define_cpu_unit "sh4a_memory" "sh4a")
|
||||
|
||||
;; Other access on the LS pipeline.
|
||||
(define_cpu_unit "sh4a_load_store" "sh4a")
|
||||
|
||||
;; The address calculator used for branch instructions.
|
||||
;; This will be reserved after "issue" of branch instructions
|
||||
;; and this is to make sure that no two branch instructions
|
||||
;; can be issued in parallel.
|
||||
(define_reservation "sh4a_addrcalc" "sh4a_ex")
|
||||
|
||||
;; =======================================================
|
||||
;; Reservations
|
||||
|
||||
;; Branch (BF,BF/S,BT,BT/S,BRA,BSR)
|
||||
;; Group: BR
|
||||
;; Latency when taken: 2
|
||||
(define_insn_reservation "sh4a_branch" 2
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "cbranch,jump"))
|
||||
"ID_or+sh4a_addrcalc")
|
||||
|
||||
;; Jump (JSR,JMP,RTS)
|
||||
;; Group: BR
|
||||
;; Latency: 3
|
||||
(define_insn_reservation "sh4a_jump" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "return,jump_ind"))
|
||||
"ID_or+sh4a_addrcalc")
|
||||
|
||||
;; RTE
|
||||
;; Group: CO
|
||||
;; Latency: 3
|
||||
(define_insn_reservation "sh4a_rte" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "rte"))
|
||||
"ID_and*4")
|
||||
|
||||
;; EX Group Single
|
||||
;; Group: EX
|
||||
;; Latency: 0
|
||||
(define_insn_reservation "sh4a_ex" 0
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "insn_class" "ex_group"))
|
||||
"sh4a_ex")
|
||||
|
||||
;; MOVA
|
||||
;; Group: LS
|
||||
;; Latency: 1
|
||||
(define_insn_reservation "sh4a_mova" 1
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "mova"))
|
||||
"sh4a_ls+sh4a_load_store")
|
||||
|
||||
;; MOV
|
||||
;; Group: MT
|
||||
;; Latency: 0
|
||||
(define_insn_reservation "sh4a_mov" 0
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "move"))
|
||||
"ID_or")
|
||||
|
||||
;; Load
|
||||
;; Group: LS
|
||||
;; Latency: 3
|
||||
(define_insn_reservation "sh4a_load" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "load,pcload"))
|
||||
"sh4a_ls+sh4a_memory")
|
||||
|
||||
(define_insn_reservation "sh4a_load_si" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "load_si,pcload_si"))
|
||||
"sh4a_ls+sh4a_memory")
|
||||
|
||||
;; Store
|
||||
;; Group: LS
|
||||
;; Latency: 0
|
||||
(define_insn_reservation "sh4a_store" 0
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "store"))
|
||||
"sh4a_ls+sh4a_memory")
|
||||
|
||||
;; CWB TYPE
|
||||
|
||||
;; MOVUA
|
||||
;; Group: LS
|
||||
;; Latency: 3
|
||||
(define_insn_reservation "sh4a_movua" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "movua"))
|
||||
"sh4a_ls+sh4a_memory*2")
|
||||
|
||||
;; Fixed point multiplication (single)
|
||||
;; Group: CO
|
||||
;; Latency: 2
|
||||
(define_insn_reservation "sh4a_smult" 2
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "smpy"))
|
||||
"ID_or+sh4a_mult")
|
||||
|
||||
;; Fixed point multiplication (double)
|
||||
;; Group: CO
|
||||
;; Latency: 3
|
||||
(define_insn_reservation "sh4a_dmult" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "dmpy"))
|
||||
"ID_or+sh4a_mult")
|
||||
|
||||
(define_insn_reservation "sh4a_mac_gp" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "mac_gp"))
|
||||
"ID_and")
|
||||
|
||||
;; Other MT group instructions(1 step operations)
|
||||
;; Group: MT
|
||||
;; Latency: 1
|
||||
(define_insn_reservation "sh4a_mt" 1
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "mt_group"))
|
||||
"ID_or")
|
||||
|
||||
;; Floating point reg move
|
||||
;; Group: LS
|
||||
;; Latency: 2
|
||||
(define_insn_reservation "sh4a_freg_mov" 2
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "fmove"))
|
||||
"sh4a_ls,sh4a_fls")
|
||||
|
||||
;; Single precision floating point computation FCMP/EQ,
|
||||
;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
|
||||
;; Group: FE
|
||||
;; Latency: 3
|
||||
(define_insn_reservation "sh4a_fp_arith" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "fp"))
|
||||
"ID_or,sh4a_fex")
|
||||
|
||||
(define_insn_reservation "sh4a_fp_arith_ftrc" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "ftrc_s"))
|
||||
"ID_or,sh4a_fex")
|
||||
|
||||
;; Single-precision FDIV/FSQRT
|
||||
;; Group: FE
|
||||
;; Latency: 20
|
||||
(define_insn_reservation "sh4a_fdiv" 20
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "fdiv"))
|
||||
"ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex")
|
||||
|
||||
;; Double Precision floating point computation
|
||||
;; (FCNVDS, FCNVSD, FLOAT, FTRC)
|
||||
;; Group: FE
|
||||
;; Latency: 3
|
||||
(define_insn_reservation "sh4a_dp_float" 3
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "dfp_conv"))
|
||||
"ID_or,sh4a_fex")
|
||||
|
||||
;; Double-precision floating-point (FADD,FMUL,FSUB)
|
||||
;; Group: FE
|
||||
;; Latency: 5
|
||||
(define_insn_reservation "sh4a_fp_double_arith" 5
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "dfp_arith"))
|
||||
"ID_or,sh4a_fex*3")
|
||||
|
||||
;; Double precision FDIV/SQRT
|
||||
;; Group: FE
|
||||
;; Latency: 36
|
||||
(define_insn_reservation "sh4a_dp_div" 36
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "dfdiv"))
|
||||
"ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex*2")
|
||||
|
||||
;; FSRRA
|
||||
;; Group: FE
|
||||
;; Latency: 5
|
||||
(define_insn_reservation "sh4a_fsrra" 5
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "fsrra"))
|
||||
"ID_or,sh4a_fex")
|
||||
|
||||
;; FSCA
|
||||
;; Group: FE
|
||||
;; Latency: 7
|
||||
(define_insn_reservation "sh4a_fsca" 7
|
||||
(and (eq_attr "cpu" "sh4a")
|
||||
(eq_attr "type" "fsca"))
|
||||
"ID_or,sh4a_fex*3")
|
1
gcc/config/sh/t-mlib-sh4a
Normal file
1
gcc/config/sh/t-mlib-sh4a
Normal file
@ -0,0 +1 @@
|
||||
ML_sh4a=m4a/
|
1
gcc/config/sh/t-mlib-sh4a-nofpu
Normal file
1
gcc/config/sh/t-mlib-sh4a-nofpu
Normal file
@ -0,0 +1 @@
|
||||
ML_sh4a_nofpu=m4a-nofpu/
|
1
gcc/config/sh/t-mlib-sh4a-single
Normal file
1
gcc/config/sh/t-mlib-sh4a-single
Normal file
@ -0,0 +1 @@
|
||||
ML_sh4a_single=m4a-single/
|
1
gcc/config/sh/t-mlib-sh4a-single-only
Normal file
1
gcc/config/sh/t-mlib-sh4a-single-only
Normal file
@ -0,0 +1 @@
|
||||
ML_sh4a_single_only=m4a-single-only/
|
1
gcc/config/sh/t-mlib-sh4al
Normal file
1
gcc/config/sh/t-mlib-sh4al
Normal file
@ -0,0 +1 @@
|
||||
ML_sh4al=m4al/
|
@ -22,15 +22,16 @@ fp-bit.c: $(srcdir)/config/fp-bit.c
|
||||
cat $(srcdir)/config/fp-bit.c >> fp-bit.c
|
||||
|
||||
MULTILIB_ENDIAN = ml/mb
|
||||
MULTILIB_CPUS= $(ML_sh1)$(ML_sh2e)$(ML_sh2)$(ML_sh3e)$(ML_sh3)$(ML_sh4_nofpu)$(ML_sh4_single_only)$(ML_sh4_single)$(ML_sh4)$(ML_m5_32media)$(ML_m5_32media_nofpu)$(ML_m5_compact)$(ML_m5_compact_nofpu)$(ML_m5_64media)$(ML_m5_64media_nofpu)
|
||||
MULTILIB_CPUS= $(ML_sh1)$(ML_sh2e)$(ML_sh2)$(ML_sh3e)$(ML_sh3)$(ML_sh4_nofpu)$(ML_sh4_single_only)$(ML_sh4_single)$(ML_sh4)$(ML_sh4a_nofpu)$(ML_sh4a_single_only)$(ML_sh4a_single)$(ML_sh4a)$(ML_m5_32media)$(ML_m5_32media_nofpu)$(ML_m5_compact)$(ML_m5_compact_nofpu)$(ML_m5_64media)$(ML_m5_64media_nofpu)
|
||||
|
||||
MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) $(MULTILIB_CPUS:/=)
|
||||
MULTILIB_DIRNAMES=
|
||||
#MULTILIB_MATCHES = m2=m3 m2e=m3e m2=m4-nofpu
|
||||
MULTILIB_MATCHES = $(shell \
|
||||
multilibs="$(MULTILIB_OPTIONS)" ; \
|
||||
for abi in m1,m2,m3,m4-nofpu \
|
||||
m2e,m3e,m4-single-only \
|
||||
for abi in m1,m2,m3,m4-nofpu,m4al,m4a-nofpu \
|
||||
m2e,m3e,m4-single-only,m4a-single-only \
|
||||
m4-single,m4a-single m4,m4a \
|
||||
m5-32media,m5-compact,m5-32media \
|
||||
m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \
|
||||
subst= ; \
|
||||
|
@ -621,6 +621,7 @@ See RS/6000 and PowerPC Options.
|
||||
@emph{SH Options}
|
||||
@gccoptlist{-m1 -m2 -m2e -m3 -m3e @gol
|
||||
-m4-nofpu -m4-single-only -m4-single -m4 @gol
|
||||
-m4a-nofpu -m4a-single-only -m4a-single -m4a -m4al @gol
|
||||
-m5-64media -m5-64media-nofpu @gol
|
||||
-m5-32media -m5-32media-nofpu @gol
|
||||
-m5-compact -m5-compact-nofpu @gol
|
||||
@ -10518,6 +10519,31 @@ single-precision mode by default.
|
||||
@opindex m4
|
||||
Generate code for the SH4.
|
||||
|
||||
@item -m4a-nofpu
|
||||
@opindex m4a-nofpu
|
||||
Generate code for the SH4al-dsp, or for a SH4a in such a way that the
|
||||
floating-point unit is not used.
|
||||
|
||||
@item -m4a-single-only
|
||||
@opindex m4a-single-only
|
||||
Generate code for the SH4a, in such a way that no double-precision
|
||||
floating point operations are used.
|
||||
|
||||
@item -m4a-single
|
||||
@opindex m4a-single
|
||||
Generate code for the SH4a assuming the floating-point unit is in
|
||||
single-precision mode by default.
|
||||
|
||||
@item -m4a
|
||||
@opindex m4a
|
||||
Generate code for the SH4a.
|
||||
|
||||
@item -m4al
|
||||
@opindex m4al
|
||||
Same as @option{-m4a-nofpu}, except that it implicitly passes
|
||||
@option{-dsp} to the assembler. GCC doesn't generate any DSP
|
||||
instructions at the moment.
|
||||
|
||||
@item -mb
|
||||
@opindex mb
|
||||
Compile code for the processor in big endian mode.
|
||||
|
@ -1,3 +1,19 @@
|
||||
2004-07-28 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
2003-07-06 Alexandre Oliva <aoliva@redhat.com>
|
||||
* gcc.dg/sh4a-memmovua.c: Tweak regular expression.
|
||||
2003-07-01 Alexandre Oliva <aoliva@redhat.com>
|
||||
* gcc.dg/sh4a-bitmovua.c: New.
|
||||
* gcc.dg/sh4a-cos.c: New.
|
||||
* gcc.dg/sh4a-cosf.c: New.
|
||||
* gcc.dg/sh4a-fprun.c: New.
|
||||
* gcc.dg/sh4a-fsrra.c: New.
|
||||
* gcc.dg/sh4a-memmovua.c: New.
|
||||
* gcc.dg/sh4a-sin.c: New.
|
||||
* gcc.dg/sh4a-sincos.c: New.
|
||||
* gcc.dg/sh4a-sincosf.c: New.
|
||||
* gcc.dg/sh4a-sinf.c: New.
|
||||
|
||||
2004-07-28 Diego Novillo <dnovillo@redhat.com>
|
||||
|
||||
* gcc.dg/tree-ssa/20030714-2.c: Adjust number of expected
|
||||
|
73
gcc/testsuite/gcc.dg/sh4a-bitmovua.c
Normal file
73
gcc/testsuite/gcc.dg/sh4a-bitmovua.c
Normal file
@ -0,0 +1,73 @@
|
||||
/* Verify that we generate movua to load unaligned 32-bit values. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O" } */
|
||||
/* { dg-final { scan-assembler-times "\tmovua\\.l\t" 6 } } */
|
||||
|
||||
#ifdef __SH4A__
|
||||
/* Aligned. */
|
||||
struct s0 { long long d : 32; } x0;
|
||||
long long f0() {
|
||||
return x0.d;
|
||||
}
|
||||
|
||||
/* Unaligned load. */
|
||||
struct s1 { long long c : 8; long long d : 32; } x1;
|
||||
long long f1() {
|
||||
return x1.d;
|
||||
}
|
||||
|
||||
/* Unaligned load. */
|
||||
struct s2 { long long c : 16; long long d : 32; } x2;
|
||||
long long f2() {
|
||||
return x2.d;
|
||||
}
|
||||
|
||||
/* Unaligned load. */
|
||||
struct s3 { long long c : 24; long long d : 32; } x3;
|
||||
long long f3() {
|
||||
return x3.d;
|
||||
}
|
||||
|
||||
/* Aligned. */
|
||||
struct s4 { long long c : 32; long long d : 32; } x4;
|
||||
long long f4() {
|
||||
return x4.d;
|
||||
}
|
||||
|
||||
/* Aligned. */
|
||||
struct u0 { unsigned long long d : 32; } y0;
|
||||
unsigned long long g0() {
|
||||
return y0.d;
|
||||
}
|
||||
|
||||
/* Unaligned load. */
|
||||
struct u1 { long long c : 8; unsigned long long d : 32; } y1;
|
||||
unsigned long long g1() {
|
||||
return y1.d;
|
||||
}
|
||||
|
||||
/* Unaligned load. */
|
||||
struct u2 { long long c : 16; unsigned long long d : 32; } y2;
|
||||
unsigned long long g2() {
|
||||
return y2.d;
|
||||
}
|
||||
|
||||
/* Unaligned load. */
|
||||
struct u3 { long long c : 24; unsigned long long d : 32; } y3;
|
||||
unsigned long long g3() {
|
||||
return y3.d;
|
||||
}
|
||||
|
||||
/* Aligned. */
|
||||
struct u4 { long long c : 32; unsigned long long d : 32; } y4;
|
||||
unsigned long long g4() {
|
||||
return y4.d;
|
||||
}
|
||||
#else
|
||||
asm ("movua.l\t");
|
||||
asm ("movua.l\t");
|
||||
asm ("movua.l\t");
|
||||
asm ("movua.l\t");
|
||||
asm ("movua.l\t");
|
||||
asm ("movua.l\t");
|
||||
#endif
|
13
gcc/testsuite/gcc.dg/sh4a-cos.c
Normal file
13
gcc/testsuite/gcc.dg/sh4a-cos.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* Verify that we generate single-precision sine and cosine approximate
|
||||
(fsca) in fast math mode. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
/* { dg-final { scan-assembler "\tfsca\t" } } */
|
||||
|
||||
#if defined __SH4A__ && ! defined __SH4_NOFPU__
|
||||
#include <math.h>
|
||||
|
||||
double test(double f) { return cos(f); }
|
||||
#else
|
||||
asm ("fsca\t");
|
||||
#endif
|
13
gcc/testsuite/gcc.dg/sh4a-cosf.c
Normal file
13
gcc/testsuite/gcc.dg/sh4a-cosf.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* Verify that we generate single-precision sine and cosine approximate
|
||||
(fsca) in fast math mode. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
/* { dg-final { scan-assembler "\tfsca\t" } } */
|
||||
|
||||
#if defined __SH4A__ && ! defined __SH4_NOFPU__
|
||||
#include <math.h>
|
||||
|
||||
float test(float f) { return cosf(f); }
|
||||
#else
|
||||
asm ("fsca\t");
|
||||
#endif
|
35
gcc/testsuite/gcc.dg/sh4a-fprun.c
Normal file
35
gcc/testsuite/gcc.dg/sh4a-fprun.c
Normal file
@ -0,0 +1,35 @@
|
||||
/* Verify that fsca and fssra yield reasonable results. */
|
||||
/* { do-do run { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
float sqrt_arg = 4.0f, sqrt_res = 2.0f;
|
||||
float dg2rad_f;
|
||||
double dg2rad_d;
|
||||
|
||||
void check_f (float res, float expected) {
|
||||
if (res >= expected - 0.001f && res <= expected + 0.001f)
|
||||
return;
|
||||
|
||||
abort ();
|
||||
}
|
||||
|
||||
void check_d (double res, double expected) {
|
||||
if (res >= expected - 0.001 && res <= expected + 0.001)
|
||||
return;
|
||||
|
||||
abort ();
|
||||
}
|
||||
|
||||
int main() {
|
||||
check_f (sqrtf(sqrt_arg), sqrt_res);
|
||||
dg2rad_f = dg2rad_d = atan(1) / 45;
|
||||
check_f (sinf(90*dg2rad_f), 1);
|
||||
check_f (cosf(90*dg2rad_f), 0);
|
||||
check_d (sin(-90*dg2rad_d), -1);
|
||||
check_d (cos(180*dg2rad_d), -1);
|
||||
check_d (sin(-45*dg2rad_d) * cosf(135*dg2rad_f), 0.5);
|
||||
exit (0);
|
||||
}
|
13
gcc/testsuite/gcc.dg/sh4a-fsrra.c
Normal file
13
gcc/testsuite/gcc.dg/sh4a-fsrra.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* Verify that we generate single-precision square root reciprocal
|
||||
approximate (fsrra) in fast math mode. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
/* { dg-final { scan-assembler "\tfsrra\t" } } */
|
||||
|
||||
#if defined __SH4A__ && ! defined __SH4_NOFPU__
|
||||
#include <math.h>
|
||||
|
||||
float test(float f) { return 1 / sqrtf(f); }
|
||||
#else
|
||||
asm ("fsrra\t");
|
||||
#endif
|
17
gcc/testsuite/gcc.dg/sh4a-memmovua.c
Normal file
17
gcc/testsuite/gcc.dg/sh4a-memmovua.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* Verify that we generate movua to copy unaligned memory regions to
|
||||
32-bit-aligned addresses. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O" } */
|
||||
/* { dg-final { scan-assembler-times "\tmovua\\.l\t(.*)+" 2 } } */
|
||||
|
||||
#ifdef __SH4A__
|
||||
#include <stdlib.h>
|
||||
|
||||
struct s { int i; char a[10], b[10]; } x;
|
||||
int f() {
|
||||
memcpy(x.a, x.b, 10);
|
||||
}
|
||||
#else
|
||||
asm ("movua.l\t+");
|
||||
asm ("movua.l\t+");
|
||||
#endif
|
13
gcc/testsuite/gcc.dg/sh4a-sin.c
Normal file
13
gcc/testsuite/gcc.dg/sh4a-sin.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* Verify that we generate single-precision sine and cosine approximate
|
||||
(fsca) in fast math mode. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
/* { dg-final { scan-assembler "\tfsca\t" } } */
|
||||
|
||||
#if defined __SH4A__ && ! defined __SH4_NOFPU__
|
||||
#include <math.h>
|
||||
|
||||
double test(double f) { return sin(f); }
|
||||
#else
|
||||
asm ("fsca\t");
|
||||
#endif
|
14
gcc/testsuite/gcc.dg/sh4a-sincos.c
Normal file
14
gcc/testsuite/gcc.dg/sh4a-sincos.c
Normal file
@ -0,0 +1,14 @@
|
||||
/* Verify that we generate a single single-precision sine and cosine
|
||||
approximate (fsca) in fast math mode when a function computes both
|
||||
sine and cosine. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
/* { dg-final { scan-assembler-times "\tfsca\t" 1 } } */
|
||||
|
||||
#if defined __SH4A__ && ! defined __SH4_NOFPU__
|
||||
#include <math.h>
|
||||
|
||||
double test(double f) { return sin(f) + cos(f); }
|
||||
#else
|
||||
asm ("fsca\t");
|
||||
#endif
|
14
gcc/testsuite/gcc.dg/sh4a-sincosf.c
Normal file
14
gcc/testsuite/gcc.dg/sh4a-sincosf.c
Normal file
@ -0,0 +1,14 @@
|
||||
/* Verify that we generate a single single-precision sine and cosine
|
||||
approximate (fsca) in fast math mode when a function computes both
|
||||
sine and cosine. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
/* { dg-final { scan-assembler-times "\tfsca\t" 1 } } */
|
||||
|
||||
#if defined __SH4A__ && ! defined __SH4_NOFPU__
|
||||
#include <math.h>
|
||||
|
||||
float test(float f) { return sinf(f) + cosf(f); }
|
||||
#else
|
||||
asm ("fsca\t");
|
||||
#endif
|
13
gcc/testsuite/gcc.dg/sh4a-sinf.c
Normal file
13
gcc/testsuite/gcc.dg/sh4a-sinf.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* Verify that we generate single-precision sine and cosine approximate
|
||||
(fsca) in fast math mode. */
|
||||
/* { dg-do compile { target "sh*-*-*" } } */
|
||||
/* { dg-options "-O -ffast-math" } */
|
||||
/* { dg-final { scan-assembler "\tfsca\t" } } */
|
||||
|
||||
#if defined __SH4A__ && ! defined __SH4_NOFPU__
|
||||
#include <math.h>
|
||||
|
||||
float test(float f) { return sinf(f); }
|
||||
#else
|
||||
asm ("fsca\t");
|
||||
#endif
|
@ -1,3 +1,11 @@
|
||||
2004-07-28 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
2003-10-01 Eric Christopher <echristo@redhat.com>
|
||||
* config/cpu/sh/atomicity.h (__exchange_and_add): Remove 'm'
|
||||
constraint.
|
||||
2003-07-09 Alexandre Oliva <aoliva@redhat.com>
|
||||
* config/cpu/sh/atomicity.h: New. Use movli and movco on SH4a.
|
||||
|
||||
2004-07-23 Benjamin Kosnik <bkoz@redhat.com>
|
||||
|
||||
PR libstdc++/16678
|
||||
|
123
libstdc++-v3/config/cpu/sh/atomicity.h
Normal file
123
libstdc++-v3/config/cpu/sh/atomicity.h
Normal file
@ -0,0 +1,123 @@
|
||||
// Low-level functions for atomic operations: Generic version -*- C++ -*-
|
||||
|
||||
// Copyright (C) 1999, 2001, 2002, 2003 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 2, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING. If not, write to the Free
|
||||
// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
// USA.
|
||||
|
||||
// As a special exception, you may use this file as part of a free software
|
||||
// library without restriction. Specifically, if other files instantiate
|
||||
// templates or use macros or inline functions from this file, or you compile
|
||||
// this file and link it with other files to produce an executable, this
|
||||
// file does not by itself cause the resulting executable to be covered by
|
||||
// the GNU General Public License. This exception does not however
|
||||
// invalidate any other reasons why the executable file might be covered by
|
||||
// the GNU General Public License.
|
||||
|
||||
#ifndef _BITS_ATOMICITY_H
|
||||
#define _BITS_ATOMICITY_H 1
|
||||
|
||||
#ifdef __SH4A__
|
||||
|
||||
typedef int _Atomic_word;
|
||||
|
||||
static inline _Atomic_word
|
||||
__attribute__ ((__unused__))
|
||||
__exchange_and_add (volatile _Atomic_word* __mem, int __val)
|
||||
{
|
||||
_Atomic_word __result;
|
||||
|
||||
__asm__ __volatile__
|
||||
("0:\n"
|
||||
"\tmovli.l\t@%2,r0\n"
|
||||
"\tmov\tr0,%1\n"
|
||||
"\tadd\t%3,r0\n"
|
||||
"\tmovco.l\tr0,@%2\n"
|
||||
"\tbf\t0b"
|
||||
: "+m" (*__mem), "=r" (__result)
|
||||
: "r" (__mem), "rI08" (__val)
|
||||
: "r0");
|
||||
|
||||
return __result;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
__attribute__ ((__unused__))
|
||||
__atomic_add (volatile _Atomic_word* __mem, int __val)
|
||||
{
|
||||
asm("0:\n"
|
||||
"\tmovli.l\t@%1,r0\n"
|
||||
"\tadd\t%2,r0\n"
|
||||
"\tmovco.l\tr0,@%1\n"
|
||||
"\tbf\t0b"
|
||||
: "+m" (*__mem)
|
||||
: "r" (__mem), "rI08" (__val)
|
||||
: "r0");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* This is generic/atomicity.h */
|
||||
|
||||
#include <bits/gthr.h>
|
||||
|
||||
#define _GLIBCPP_NEED_GENERIC_MUTEX
|
||||
|
||||
typedef int _Atomic_word;
|
||||
|
||||
namespace __gnu_cxx
|
||||
{
|
||||
extern __gthread_mutex_t _Atomic_add_mutex;
|
||||
|
||||
#ifndef __GTHREAD_MUTEX_INIT
|
||||
extern __gthread_once_t _Atomic_add_mutex_once;
|
||||
extern void __gthread_atomic_add_mutex_once();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline _Atomic_word
|
||||
__attribute__ ((__unused__))
|
||||
__exchange_and_add (volatile _Atomic_word* __mem, int __val)
|
||||
{
|
||||
#ifndef __GTHREAD_MUTEX_INIT
|
||||
__gthread_once (&__gnu_cxx::_Atomic_add_mutex_once,
|
||||
__gnu_cxx::__gthread_atomic_add_mutex_once);
|
||||
#endif
|
||||
|
||||
_Atomic_word __result;
|
||||
|
||||
__gthread_mutex_lock (&__gnu_cxx::_Atomic_add_mutex);
|
||||
|
||||
__result = *__mem;
|
||||
*__mem += __val;
|
||||
|
||||
__gthread_mutex_unlock (&__gnu_cxx::_Atomic_add_mutex);
|
||||
return __result;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
__attribute__ ((__unused__))
|
||||
__atomic_add (volatile _Atomic_word* __mem, int __val)
|
||||
{
|
||||
(void) __exchange_and_add (__mem, __val);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* atomicity.h */
|
Loading…
x
Reference in New Issue
Block a user