1995-02-18 09:27:10 +08:00
|
|
|
! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
|
|
|
! sum in a third limb vector.
|
|
|
|
|
1995-12-05 02:37:56 +08:00
|
|
|
! Copyright (C) 1995 Free Software Foundation, Inc.
|
1995-02-18 09:27:10 +08:00
|
|
|
|
|
|
|
! This file is part of the GNU MP Library.
|
|
|
|
|
|
|
|
! The GNU MP Library is free software; you can redistribute it and/or modify
|
|
|
|
! it under the terms of the GNU Library General Public License as published by
|
|
|
|
! the Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
! option) any later version.
|
|
|
|
|
|
|
|
! The GNU MP Library is distributed in the hope that it will be useful, but
|
|
|
|
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
|
|
|
! License for more details.
|
|
|
|
|
|
|
|
! You should have received a copy of the GNU Library General Public License
|
|
|
|
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
|
|
|
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
|
|
|
|
|
|
|
|
! INPUT PARAMETERS
|
1995-12-05 02:37:56 +08:00
|
|
|
#define res_ptr %o0
|
|
|
|
#define s1_ptr %o1
|
|
|
|
#define s2_ptr %o2
|
|
|
|
#define size %o3
|
1995-02-18 09:27:10 +08:00
|
|
|
|
|
|
|
#include "sysdep.h"
|
|
|
|
|
|
|
|
.text
|
|
|
|
.align 4
|
|
|
|
.global C_SYMBOL_NAME(__mpn_add_n)
|
|
|
|
C_SYMBOL_NAME(__mpn_add_n):
|
1995-12-05 02:37:56 +08:00
|
|
|
cmp size,8
|
|
|
|
mov 0,%o4 ! clear cy-save register
|
|
|
|
blt,a Ltriv
|
|
|
|
addcc size,-2,size
|
|
|
|
xor s2_ptr,res_ptr,%g1
|
|
|
|
andcc %g1,4,%g0
|
|
|
|
bne L1 ! branch if alignment differs
|
|
|
|
nop
|
|
|
|
L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
|
|
|
|
beq L_v1 ! if no, branch
|
|
|
|
nop
|
|
|
|
! ** V1a **
|
|
|
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
|
|
|
ld [s1_ptr],%g4
|
|
|
|
add s1_ptr,4,s1_ptr
|
|
|
|
ld [s2_ptr],%g2
|
|
|
|
add s2_ptr,4,s2_ptr
|
|
|
|
add size,-1,size
|
|
|
|
addcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr]
|
|
|
|
add res_ptr,4,res_ptr
|
|
|
|
|
|
|
|
L_v1: ld [s1_ptr+0],%g4
|
|
|
|
ld [s1_ptr+4],%g1
|
|
|
|
ldd [s2_ptr+0],%g2
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-10,size
|
|
|
|
blt Lfin1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
|
|
|
Loop1: addxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+8],%g4
|
|
|
|
addxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+12],%g1
|
|
|
|
ldd [s2_ptr+8],%g2
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
addxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+16],%g4
|
|
|
|
addxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+20],%g1
|
|
|
|
ldd [s2_ptr+16],%g2
|
|
|
|
std %o4,[res_ptr+8]
|
|
|
|
addxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+24],%g4
|
|
|
|
addxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+28],%g1
|
|
|
|
ldd [s2_ptr+24],%g2
|
|
|
|
std %o4,[res_ptr+16]
|
|
|
|
addxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+32],%g4
|
|
|
|
addxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+36],%g1
|
|
|
|
ldd [s2_ptr+32],%g2
|
|
|
|
std %o4,[res_ptr+24]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-8,size
|
|
|
|
add s1_ptr,32,s1_ptr
|
|
|
|
add s2_ptr,32,s2_ptr
|
|
|
|
add res_ptr,32,res_ptr
|
|
|
|
bge Loop1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
|
|
|
|
Lfin1: addcc size,8-2,size
|
|
|
|
blt Lend1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
|
|
|
Loop1b: addxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+8],%g4
|
|
|
|
addxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+12],%g1
|
|
|
|
ldd [s2_ptr+8],%g2
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-2,size
|
|
|
|
add s1_ptr,8,s1_ptr
|
|
|
|
add s2_ptr,8,s2_ptr
|
|
|
|
add res_ptr,8,res_ptr
|
|
|
|
bge Loop1b
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
Lend1: addxcc %g4,%g2,%o4
|
|
|
|
addxcc %g1,%g3,%o5
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
|
|
|
|
andcc size,1,%g0
|
|
|
|
be Lret1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add last limb */
|
|
|
|
ld [s1_ptr+8],%g4
|
|
|
|
ld [s2_ptr+8],%g2
|
|
|
|
addxcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr+8]
|
|
|
|
|
|
|
|
Lret1: retl
|
|
|
|
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
|
|
|
|
|
|
|
L1: xor s1_ptr,res_ptr,%g1
|
|
|
|
andcc %g1,4,%g0
|
|
|
|
bne L2
|
|
|
|
nop
|
|
|
|
! ** V1b **
|
|
|
|
mov s2_ptr,%g1
|
|
|
|
mov s1_ptr,s2_ptr
|
|
|
|
b L0
|
|
|
|
mov %g1,s1_ptr
|
|
|
|
|
|
|
|
! ** V2 **
|
|
|
|
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
|
|
|
|
alignment of s2_ptr and res_ptr differ. Since there are only two ways
|
|
|
|
things can be aligned (that we care about) we now know that the alignment
|
|
|
|
of s1_ptr and s2_ptr are the same. */
|
|
|
|
|
|
|
|
L2: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
|
|
|
|
beq L_v2 ! if no, branch
|
|
|
|
nop
|
|
|
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
|
|
|
ld [s1_ptr],%g4
|
|
|
|
add s1_ptr,4,s1_ptr
|
|
|
|
ld [s2_ptr],%g2
|
|
|
|
add s2_ptr,4,s2_ptr
|
|
|
|
add size,-1,size
|
|
|
|
addcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr]
|
|
|
|
add res_ptr,4,res_ptr
|
|
|
|
|
|
|
|
L_v2: addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-8,size
|
|
|
|
blt Lfin2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
|
|
|
Loop2: ldd [s1_ptr+0],%g2
|
|
|
|
ldd [s2_ptr+0],%o4
|
|
|
|
addxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+0]
|
|
|
|
addxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+4]
|
|
|
|
ldd [s1_ptr+8],%g2
|
|
|
|
ldd [s2_ptr+8],%o4
|
|
|
|
addxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+8]
|
|
|
|
addxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+12]
|
|
|
|
ldd [s1_ptr+16],%g2
|
|
|
|
ldd [s2_ptr+16],%o4
|
|
|
|
addxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+16]
|
|
|
|
addxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+20]
|
|
|
|
ldd [s1_ptr+24],%g2
|
|
|
|
ldd [s2_ptr+24],%o4
|
|
|
|
addxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+24]
|
|
|
|
addxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+28]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-8,size
|
|
|
|
add s1_ptr,32,s1_ptr
|
|
|
|
add s2_ptr,32,s2_ptr
|
|
|
|
add res_ptr,32,res_ptr
|
|
|
|
bge Loop2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
|
|
|
|
Lfin2: addcc size,8-2,size
|
|
|
|
Ltriv: blt Lend2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
Loop2b: ldd [s1_ptr+0],%g2
|
|
|
|
ldd [s2_ptr+0],%o4
|
|
|
|
addxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+0]
|
|
|
|
addxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+4]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-2,size
|
|
|
|
add s1_ptr,8,s1_ptr
|
|
|
|
add s2_ptr,8,s2_ptr
|
|
|
|
add res_ptr,8,res_ptr
|
|
|
|
bge Loop2b
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
Lend2: andcc size,1,%g0
|
|
|
|
be Lret2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add last limb */
|
|
|
|
ld [s1_ptr],%g4
|
|
|
|
ld [s2_ptr],%g2
|
|
|
|
addxcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr]
|
|
|
|
|
|
|
|
Lret2: retl
|
|
|
|
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|