mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-15 14:01:04 +08:00
MSP430: Add 64-bit hardware multiply support
Hardware multipliers that support widening 32-bit multiplication can be used to perform a 64-bit * 64-bit multiplication more efficiently than a software implementation. The following equation is used to perform 64-bit multiplication for devices with "32bit" or "f5series" hardware multiply versions: 64bit_result = (low32_op0 * lop32_op1) + ((low32_op0 * high32_op1) << 32) + ((high32_op0 * low32_op1) << 32) libgcc/ChangeLog: * config/msp430/lib2hw_mul.S (mult64_hw): New. (if MUL_32): Use mult64_hw for __muldi3. (if MUL_F5): Use mult64_hw for __muldi3. * config/msp430/lib2mul.c (__muldi3): New. * config/msp430/t-msp430 (LIB2FUNCS_EXCLUDE): Define.
This commit is contained in:
parent
e045b85836
commit
bf7b94d407
@ -207,6 +207,73 @@
|
||||
MOV.W &\RES3, R15 ; Ready high 16-bits for return
|
||||
.endm
|
||||
|
||||
.macro mult64_hw MPY32_LO MPY32_HI OP2_LO OP2_HI RES0 RES1 RES2 RES3
|
||||
;* * 64-bit hardware multiply with a 64-bit result
|
||||
;* int64 = int64 * int64
|
||||
;*
|
||||
;* - Operand 1 is in R8, R9, R10, R11
|
||||
;* - Operand 2 is in R12, R13, R14, R15
|
||||
;* - Result is in R12, R13, R14, R15
|
||||
;*
|
||||
;* 64-bit multiplication is achieved using the 32-bit hardware multiplier with
|
||||
;* the following equation:
|
||||
;* R12:R15 = (R8:R9 * R12:R13) + ((R8:R9 * R14:R15) << 32) + ((R10:R11 * R12:R13) << 32)
|
||||
;*
|
||||
;* The left shift by 32 is handled with minimal cost by saving the two low
|
||||
;* words and discarding the two high words.
|
||||
;*
|
||||
;* To ensure that the multiply is performed atomically, interrupts are
|
||||
;* disabled upon routine entry. Interrupt state is restored upon exit.
|
||||
;*
|
||||
;* Registers used: R6, R7, R8, R9, R10, R11, R12, R13, R14, R15
|
||||
;*
|
||||
;* Macro arguments are the memory locations of the hardware registers.
|
||||
;*
|
||||
#if defined(__MSP430X_LARGE__)
|
||||
PUSHM.A #5, R10
|
||||
#elif defined(__MSP430X__)
|
||||
PUSHM.W #5, R10
|
||||
#else
|
||||
PUSH R10 { PUSH R9 { PUSH R8 { PUSH R7 { PUSH R6
|
||||
#endif
|
||||
; Multiply the low 32-bits of op0 and the high 32-bits of op1.
|
||||
MOV.W R8, &\MPY32_LO
|
||||
MOV.W R9, &\MPY32_HI
|
||||
MOV.W R14, &\OP2_LO
|
||||
MOV.W R15, &\OP2_HI
|
||||
; Save the low 32-bits of the result.
|
||||
MOV.W &\RES0, R6
|
||||
MOV.W &\RES1, R7
|
||||
; Multiply the high 32-bits of op0 and the low 32-bits of op1.
|
||||
MOV.W R10, &\MPY32_LO
|
||||
MOV.W R11, &\MPY32_HI
|
||||
MOV.W R12, &\OP2_LO
|
||||
MOV.W R13, &\OP2_HI
|
||||
; Add the low 32-bits of the result to the previously saved result.
|
||||
ADD.W &\RES0, R6
|
||||
ADDC.W &\RES1, R7
|
||||
; Multiply the low 32-bits of op0 and op1.
|
||||
MOV.W R8, &\MPY32_LO
|
||||
MOV.W R9, &\MPY32_HI
|
||||
MOV.W R12, &\OP2_LO
|
||||
MOV.W R13, &\OP2_HI
|
||||
; Write the return values
|
||||
MOV.W &\RES0, R12
|
||||
MOV.W &\RES1, R13
|
||||
MOV.W &\RES2, R14
|
||||
MOV.W &\RES3, R15
|
||||
; Add the saved low 32-bit results from earlier to the high 32-bits of
|
||||
; this result, effectively shifting those two results left by 32 bits.
|
||||
ADD.W R6, R14
|
||||
ADDC.W R7, R15
|
||||
#if defined(__MSP430X_LARGE__)
|
||||
POPM.A #5, R10
|
||||
#elif defined(__MSP430X__)
|
||||
POPM.W #5, R10
|
||||
#else
|
||||
POP R6 { POP R7 { POP R8 { POP R9 { POP R10
|
||||
#endif
|
||||
.endm
|
||||
|
||||
;; EABI mandated names:
|
||||
;;
|
||||
@ -365,8 +432,9 @@
|
||||
mult3264_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3
|
||||
end_func __umulsidi2
|
||||
|
||||
;; FIXME: Add a hardware version of this function.
|
||||
fake_func __muldi3 __mspabi_mpyll __mspabi_mpyll_hw32
|
||||
start_func __muldi3 __mspabi_mpyll __mspabi_mpyll_hw32
|
||||
mult64_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3
|
||||
end_func __muldi3
|
||||
|
||||
#elif defined MUL_F5
|
||||
/* The F5xxx series of MCUs support the same 16-bit and 32-bit multiply
|
||||
@ -397,8 +465,9 @@
|
||||
mult3264_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5
|
||||
end_func __umulsidi2
|
||||
|
||||
;; FIXME: Add a hardware version of this function.
|
||||
fake_func __muldi3 __mspabi_mpyll __mspabi_mpyll_f5hw
|
||||
start_func __muldi3 __mspabi_mpyll __mspabi_mpyll_f5hw
|
||||
mult64_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5
|
||||
end_func __muldi3
|
||||
|
||||
#else
|
||||
#error MUL type not defined
|
||||
|
@ -30,6 +30,58 @@ typedef unsigned int uint08_type __attribute__ ((mode (QI)));
|
||||
#define C3B(a,b,c) a##b##c
|
||||
#define C3(a,b,c) C3B(a,b,c)
|
||||
|
||||
#if defined (MUL_NONE) || defined (MUL_16)
|
||||
/* __muldi3 must be excluded from libgcc.a to prevent multiple-definition
|
||||
errors for the hwmult configurations that have their own definition.
|
||||
However, for MUL_NONE and MUL_16, the software version is still required, so
|
||||
the necessary preprocessed output from libgcc2.c to compile that
|
||||
software version of __muldi3 is below. */
|
||||
typedef unsigned int USItype __attribute__ ((mode (SI)));
|
||||
typedef int DItype __attribute__ ((mode (DI)));
|
||||
typedef int SItype __attribute__ ((mode (SI)));
|
||||
struct DWstruct {SItype low, high;};
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct DWstruct s;
|
||||
DItype ll;
|
||||
} DWunion;
|
||||
|
||||
DItype __muldi3 (DItype u, DItype v);
|
||||
|
||||
DItype
|
||||
__muldi3 (DItype u, DItype v)
|
||||
{
|
||||
const DWunion uu = {.ll = u};
|
||||
const DWunion vv = {.ll = v};
|
||||
/* The next block of code is expanded from the following line:
|
||||
DWunion w = {.ll = __umulsidi3 (uu.s.low, vv.s.low)}; */
|
||||
DWunion w;
|
||||
USItype __x0, __x1, __x2, __x3;
|
||||
USItype __ul, __vl, __uh, __vh;
|
||||
__ul = ((USItype) (uu.s.low) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
|
||||
__uh = ((USItype) (uu.s.low) >> ((4 * 8) / 2));
|
||||
__vl = ((USItype) (vv.s.low) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
|
||||
__vh = ((USItype) (vv.s.low) >> ((4 * 8) / 2));
|
||||
__x0 = (USItype) __ul * __vl;
|
||||
__x1 = (USItype) __ul * __vh;
|
||||
__x2 = (USItype) __uh * __vl;
|
||||
__x3 = (USItype) __uh * __vh;
|
||||
__x1 += ((USItype) (__x0) >> ((4 * 8) / 2));
|
||||
__x1 += __x2;
|
||||
if (__x1 < __x2)
|
||||
__x3 += ((USItype) 1 << ((4 * 8) / 2));
|
||||
(w.s.high) = __x3 + ((USItype) (__x1) >> ((4 * 8) / 2));
|
||||
(w.s.low) = ((USItype) (__x1) & (((USItype) 1 << ((4 * 8) / 2)) - 1))
|
||||
* ((USItype) 1 << ((4 * 8) / 2))
|
||||
+ ((USItype) (__x0) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
|
||||
|
||||
w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
|
||||
+ (USItype) uu.s.high * (USItype) vv.s.low);
|
||||
return w.ll;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined MUL_NONE
|
||||
|
||||
/* The software multiply library needs __mspabi_mpyll. */
|
||||
|
@ -40,6 +40,11 @@ LIB2ADD = \
|
||||
$(srcdir)/config/msp430/floathisf.c \
|
||||
$(srcdir)/config/msp430/cmpd.c
|
||||
|
||||
# 32-bit and F5series hardware multiply have their own version of this function.
|
||||
# To handle the case when there is no hardware multiply or only 16-bit hardware
|
||||
# multiply, the libgcc version has been copied to lib2mul.c.
|
||||
LIB2FUNCS_EXCLUDE += _muldi3
|
||||
|
||||
HOST_LIBGCC2_CFLAGS += -Os -ffunction-sections -fdata-sections -mhwmult=none
|
||||
|
||||
crtbegin_no_eh.o: $(srcdir)/crtstuff.c
|
||||
|
Loading…
x
Reference in New Issue
Block a user