mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-23 01:50:33 +08:00
re PR target/49313 (Inefficient libgcc implementations for avr)
PR target/49313 * config/avr/t-avr (LIB2FUNCS_EXCLUDE): Add _moddi3, _umoddi3. (LIB1ASMFUNCS): Add _divdi3, _udivdi3, _udivmod64, _negdi2. * config/avr/lib1funcs.S (wmov): New assembler macro. (__umoddi3, __udivdi3, __udivdi3_umoddi3): New functions. (__moddi3, __divdi3, __divdi3_moddi3): New functions. (__udivmod64): New function. (__negdi2): New function. From-SVN: r181551
This commit is contained in:
parent
515a0cfc80
commit
989bdb7461
@ -1,3 +1,14 @@
|
||||
2011-11-21 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
PR target/49313
|
||||
* config/avr/t-avr (LIB2FUNCS_EXCLUDE): Add _moddi3, _umoddi3.
|
||||
(LIB1ASMFUNCS): Add _divdi3, _udivdi3, _udivmod64, _negdi2.
|
||||
* config/avr/lib1funcs.S (wmov): New assembler macro.
|
||||
(__umoddi3, __udivdi3, __udivdi3_umoddi3): New functions.
|
||||
(__moddi3, __divdi3, __divdi3_moddi3): New functions.
|
||||
(__udivmod64): New function.
|
||||
(__negdi2): New function.
|
||||
|
||||
2011-11-21 Gerald Pfeifer <gerald@pfeifer.com>
|
||||
|
||||
* config.host (*-*-freebsd[12], *-*-freebsd[12].*,
|
||||
|
@ -61,6 +61,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro wmov r_dest, r_src
|
||||
#if defined (__AVR_HAVE_MOVW__)
|
||||
movw \r_dest, \r_src
|
||||
#else
|
||||
mov \r_dest, \r_src
|
||||
mov \r_dest+1, \r_src+1
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#if defined (__AVR_HAVE_JMP_CALL__)
|
||||
#define XCALL call
|
||||
#define XJMP jmp
|
||||
@ -846,6 +855,352 @@ __divmodsi4_exit:
|
||||
ENDF __divmodsi4
|
||||
#endif /* defined (L_divmodsi4) */
|
||||
|
||||
|
||||
/*******************************************************
|
||||
Division 64 / 64
|
||||
Modulo 64 % 64
|
||||
*******************************************************/
|
||||
|
||||
;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
|
||||
;; at least 16k of Program Memory. For smaller Devices, depend
|
||||
;; on MOVW.
|
||||
|
||||
#if defined (__AVR_HAVE_JMP_CALL__)
|
||||
# define SPEED_DIV 8
|
||||
#elif defined (__AVR_HAVE_MOVW__)
|
||||
# define SPEED_DIV 16
|
||||
#else
|
||||
# define SPEED_DIV 0
|
||||
#endif
|
||||
|
||||
;; A[0..7]: In: Dividend;
|
||||
;; Out: Quotient (T = 0)
|
||||
;; Out: Remainder (T = 1)
|
||||
#define A0 18
|
||||
#define A1 A0+1
|
||||
#define A2 A0+2
|
||||
#define A3 A0+3
|
||||
#define A4 A0+4
|
||||
#define A5 A0+5
|
||||
#define A6 A0+6
|
||||
#define A7 A0+7
|
||||
|
||||
;; B[0..7]: In: Divisor; Out: Clobber
|
||||
#define B0 10
|
||||
#define B1 B0+1
|
||||
#define B2 B0+2
|
||||
#define B3 B0+3
|
||||
#define B4 B0+4
|
||||
#define B5 B0+5
|
||||
#define B6 B0+6
|
||||
#define B7 B0+7
|
||||
|
||||
;; C[0..7]: Expand remainder; Out: Remainder (unused)
|
||||
#define C0 8
|
||||
#define C1 C0+1
|
||||
#define C2 30
|
||||
#define C3 C2+1
|
||||
#define C4 28
|
||||
#define C5 C4+1
|
||||
#define C6 26
|
||||
#define C7 C6+1
|
||||
|
||||
;; Holds Signs during Division Routine
|
||||
#define SS __tmp_reg__
|
||||
|
||||
;; Bit-Counter in Division Routine
|
||||
#define R_cnt __zero_reg__
|
||||
|
||||
;; Scratch Register for Negation
|
||||
#define NN r31
|
||||
|
||||
#if defined (L_udivdi3)
|
||||
|
||||
;; R25:R18 = R24:R18 umod R17:R10
|
||||
;; Ordinary ABI-Function
|
||||
|
||||
DEFUN __umoddi3
|
||||
set
|
||||
rjmp __udivdi3_umoddi3
|
||||
ENDF __umoddi3
|
||||
|
||||
;; R25:R18 = R24:R18 udiv R17:R10
|
||||
;; Ordinary ABI-Function
|
||||
|
||||
DEFUN __udivdi3
|
||||
clt
|
||||
ENDF __udivdi3
|
||||
|
||||
DEFUN __udivdi3_umoddi3
|
||||
push C0
|
||||
push C1
|
||||
push C4
|
||||
push C5
|
||||
XCALL __udivmod64
|
||||
pop C5
|
||||
pop C4
|
||||
pop C1
|
||||
pop C0
|
||||
ret
|
||||
ENDF __udivdi3_umoddi3
|
||||
#endif /* L_udivdi3 */
|
||||
|
||||
#if defined (L_udivmod64)
|
||||
|
||||
;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
|
||||
;; No Registers saved/restored; the Callers will take Care.
|
||||
;; Preserves B[] and T-flag
|
||||
;; T = 0: Compute Quotient in A[]
|
||||
;; T = 1: Compute Remainder in A[] and shift SS one Bit left
|
||||
|
||||
DEFUN __udivmod64
|
||||
|
||||
;; Clear Remainder (C6, C7 will follow)
|
||||
clr C0
|
||||
clr C1
|
||||
wmov C2, C0
|
||||
wmov C4, C0
|
||||
ldi C7, 64
|
||||
|
||||
#if SPEED_DIV == 0 || SPEED_DIV == 16
|
||||
;; Initialize Loop-Counter
|
||||
mov R_cnt, C7
|
||||
wmov C6, C0
|
||||
#endif /* SPEED_DIV */
|
||||
|
||||
#if SPEED_DIV == 8
|
||||
|
||||
push A7
|
||||
clr C6
|
||||
|
||||
1: ;; Compare shifted Devidend against Divisor
|
||||
;; If -- even after Shifting -- it is smaller...
|
||||
CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
|
||||
cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
|
||||
brcc 2f
|
||||
|
||||
;; ...then we can subtract it. Thus, it is legal to shift left
|
||||
$ mov C6,C5 $ mov C5,C4 $ mov C4,C3
|
||||
mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
|
||||
mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
|
||||
mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
|
||||
|
||||
;; 8 Bits are done
|
||||
subi C7, 8
|
||||
brne 1b
|
||||
|
||||
;; Shifted 64 Bits: A7 has traveled to C7
|
||||
pop C7
|
||||
;; Divisor is greater than Dividend. We have:
|
||||
;; A[] % B[] = A[]
|
||||
;; A[] / B[] = 0
|
||||
;; Thus, we can return immediately
|
||||
rjmp 5f
|
||||
|
||||
2: ;; Initialze Bit-Counter with Number of Bits still to be performed
|
||||
mov R_cnt, C7
|
||||
|
||||
;; Push of A7 is not needed because C7 is still 0
|
||||
pop C7
|
||||
clr C7
|
||||
|
||||
#elif SPEED_DIV == 16
|
||||
|
||||
;; Compare shifted Dividend against Divisor
|
||||
cp A7, B3
|
||||
cpc C0, B4
|
||||
cpc C1, B5
|
||||
cpc C2, B6
|
||||
cpc C3, B7
|
||||
brcc 2f
|
||||
|
||||
;; Divisor is greater than shifted Dividen: We can shift the Dividend
|
||||
;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
|
||||
wmov C2,A6 $ wmov C0,A4
|
||||
wmov A6,A2 $ wmov A4,A0
|
||||
wmov A2,C6 $ wmov A0,C4
|
||||
|
||||
;; Set Bit Counter to 32
|
||||
lsr R_cnt
|
||||
2:
|
||||
#elif SPEED_DIV
|
||||
#error SPEED_DIV = ?
|
||||
#endif /* SPEED_DIV */
|
||||
|
||||
;; The very Division + Remainder Routine
|
||||
|
||||
3: ;; Left-shift Dividend...
|
||||
lsl A0 $ rol A1 $ rol A2 $ rol A3
|
||||
rol A4 $ rol A5 $ rol A6 $ rol A7
|
||||
|
||||
;; ...into Remainder
|
||||
rol C0 $ rol C1 $ rol C2 $ rol C3
|
||||
rol C4 $ rol C5 $ rol C6 $ rol C7
|
||||
|
||||
;; Compare Remainder and Divisor
|
||||
CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
|
||||
cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
|
||||
|
||||
brcs 4f
|
||||
|
||||
;; Divisor fits into Remainder: Subtract it from Remainder...
|
||||
SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
|
||||
sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
|
||||
|
||||
;; ...and set according Bit in the upcoming Quotient
|
||||
;; The Bit will travel to its final Position
|
||||
ori A0, 1
|
||||
|
||||
4: ;; This Bit is done
|
||||
dec R_cnt
|
||||
brne 3b
|
||||
;; __zero_reg__ is 0 again
|
||||
|
||||
;; T = 0: We are fine with the Quotient in A[]
|
||||
;; T = 1: Copy Remainder to A[]
|
||||
5: brtc 6f
|
||||
wmov A0, C0
|
||||
wmov A2, C2
|
||||
wmov A4, C4
|
||||
wmov A6, C6
|
||||
;; Move the Sign of the Result to SS.7
|
||||
lsl SS
|
||||
|
||||
6: ret
|
||||
|
||||
ENDF __udivmod64
|
||||
#endif /* L_udivmod64 */
|
||||
|
||||
|
||||
#if defined (L_divdi3)
|
||||
|
||||
;; R25:R18 = R24:R18 mod R17:R10
|
||||
;; Ordinary ABI-Function
|
||||
|
||||
DEFUN __moddi3
|
||||
set
|
||||
rjmp __divdi3_moddi3
|
||||
ENDF __moddi3
|
||||
|
||||
;; R25:R18 = R24:R18 div R17:R10
|
||||
;; Ordinary ABI-Function
|
||||
|
||||
DEFUN __divdi3
|
||||
clt
|
||||
ENDF __divdi3
|
||||
|
||||
DEFUN __divdi3_moddi3
|
||||
#if SPEED_DIV
|
||||
mov r31, A7
|
||||
or r31, B7
|
||||
brmi 0f
|
||||
;; Both Signs are 0: the following Complexitiy is not needed
|
||||
XJMP __udivdi3_umoddi3
|
||||
#endif /* SPEED_DIV */
|
||||
|
||||
0: ;; The Prologue
|
||||
;; Save Z = 12 Registers: Y, 17...8
|
||||
;; No Frame needed (X = 0)
|
||||
clr r26
|
||||
clr r27
|
||||
ldi r30, lo8(gs(1f))
|
||||
ldi r31, hi8(gs(1f))
|
||||
XJMP __prologue_saves__ + ((18 - 12) * 2)
|
||||
|
||||
1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
|
||||
;; SS.6 will contain the Sign of the Remainder (A.sign)
|
||||
mov SS, A7
|
||||
asr SS
|
||||
;; Adjust Dividend's Sign as needed
|
||||
#if SPEED_DIV
|
||||
;; Compiling for Speed we know that at least one Sign must be < 0
|
||||
;; Thus, if A[] >= 0 then we know B[] < 0
|
||||
brpl 22f
|
||||
#else
|
||||
brpl 21f
|
||||
#endif /* SPEED_DIV */
|
||||
|
||||
XCALL __negdi2
|
||||
|
||||
;; Adjust Divisor's Sign and SS.7 as needed
|
||||
21: tst B7
|
||||
brpl 3f
|
||||
22: ldi NN, 1 << 7
|
||||
eor SS, NN
|
||||
|
||||
ldi NN, -1
|
||||
com B4 $ com B5 $ com B6 $ com B7
|
||||
$ com B1 $ com B2 $ com B3
|
||||
NEG B0
|
||||
$ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
|
||||
sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
|
||||
|
||||
3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
|
||||
XCALL __udivmod64
|
||||
|
||||
;; Adjust Result's Sign
|
||||
#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
|
||||
tst SS
|
||||
brpl 4f
|
||||
#else
|
||||
sbrc SS, 7
|
||||
#endif /* __AVR_HAVE_JMP_CALL__ */
|
||||
XCALL __negdi2
|
||||
|
||||
4: ;; Epilogue: Restore the Z = 12 Registers and return
|
||||
in r28, __SP_L__
|
||||
in r29, __SP_H__
|
||||
ldi r30, 12
|
||||
XJMP __epilogue_restores__ + ((18 - 12) * 2)
|
||||
|
||||
ENDF __divdi3_moddi3
|
||||
|
||||
#undef R_cnt
|
||||
#undef SS
|
||||
#undef NN
|
||||
|
||||
#endif /* L_divdi3 */
|
||||
|
||||
#if defined (L_negdi2)
|
||||
DEFUN __negdi2
|
||||
|
||||
com A4 $ com A5 $ com A6 $ com A7
|
||||
$ com A1 $ com A2 $ com A3
|
||||
NEG A0
|
||||
$ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
|
||||
sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
|
||||
ret
|
||||
|
||||
ENDF __negdi2
|
||||
#endif /* L_negdi2 */
|
||||
|
||||
#undef C7
|
||||
#undef C6
|
||||
#undef C5
|
||||
#undef C4
|
||||
#undef C3
|
||||
#undef C2
|
||||
#undef C1
|
||||
#undef C0
|
||||
|
||||
#undef B7
|
||||
#undef B6
|
||||
#undef B5
|
||||
#undef B4
|
||||
#undef B3
|
||||
#undef B2
|
||||
#undef B1
|
||||
#undef B0
|
||||
|
||||
#undef A7
|
||||
#undef A6
|
||||
#undef A5
|
||||
#undef A4
|
||||
#undef A3
|
||||
#undef A2
|
||||
#undef A1
|
||||
#undef A0
|
||||
|
||||
|
||||
.section .text.libgcc.prologue, "ax", @progbits
|
||||
|
||||
@ -854,6 +1209,7 @@ ENDF __divmodsi4
|
||||
**********************************/
|
||||
#if defined (L_prologue)
|
||||
|
||||
;; This function does not clobber T-flag; 64-bit division relies on it
|
||||
DEFUN __prologue_saves__
|
||||
push r2
|
||||
push r3
|
||||
|
@ -15,6 +15,9 @@ LIB1ASMFUNCS = \
|
||||
_divmodpsi4 _udivmodpsi4 \
|
||||
_udivmodsi4 \
|
||||
_divmodsi4 \
|
||||
_divdi3 _udivdi3 \
|
||||
_udivmod64 \
|
||||
_negdi2 \
|
||||
_prologue \
|
||||
_epilogue \
|
||||
_exit \
|
||||
@ -50,6 +53,7 @@ LIB1ASMFUNCS = \
|
||||
_fmul _fmuls _fmulsu
|
||||
|
||||
LIB2FUNCS_EXCLUDE = \
|
||||
_moddi3 _umoddi3 \
|
||||
_clz
|
||||
|
||||
# We do not have the DF type.
|
||||
|
Loading…
x
Reference in New Issue
Block a user