Simplify power6 wordcopy by adding [fwd|bwd]_align_merge macros.

This commit is contained in:
Will Schmidt 2012-04-18 14:52:25 -05:00 committed by Ryan S. Arnold
parent 95aa737cf9
commit b282631e36
3 changed files with 87 additions and 272 deletions

View File

@ -1,3 +1,11 @@
2012-04-18 Will Schmidt <will_schmidt@vnet.ibm.com>
* sysdeps/powerpc/powerpc64/power6/wordcopy.c: Add fwd_align_merge and
bwd_align_merge macros.
(_wordcopy_fwd_dest_aligned): Use fwd_align_merge macro calls.
(_wordcopy_bwd_dest_aligned): Use bwd_align_merge macro calls.
* sysdeps/powerpc/powerpc32/power6/wordcopy.c: Likewise.
2012-04-18 David S. Miller <davem@davemloft.net>
* sysdeps/sparc/sparc64/memcopy.h: Delete.

View File

@ -1,5 +1,5 @@
/* _memcopy.c -- subroutines for memory copy functions.
Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc.
Copyright (C) 1991-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Torbjorn Granlund (tege@sics.se).
Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com).
@ -65,6 +65,20 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
DSTP should be aligned for memory operations on `op_t's, but SRCP must
*not* be aligned. */
#define fwd_align_merge(align) \
do \
{ \
a1 = ((op_t *) srcp)[1]; \
a2 = ((op_t *) srcp)[2]; \
((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
a0 = a2; \
srcp += 2 * OPSIZ; \
dstp += 2 * OPSIZ; \
len -= 2; \
} \
while (len != 0)
void
_wordcopy_fwd_dest_aligned (dstp, srcp, len)
long int dstp;
@ -104,49 +118,13 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (1);
break;
case 2:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (2);
break;
case 3:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (3);
break;
}
@ -191,6 +169,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
while (len != 0);
}
#define bwd_align_merge(align) \
do \
{ \
srcp -= 2 * OPSIZ; \
dstp -= 2 * OPSIZ; \
a1 = ((op_t *) srcp)[1]; \
a0 = ((op_t *) srcp)[0]; \
((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
a2 = a0; \
len -= 2; \
} \
while (len != 0)
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'
words (not LEN bytes!). DSTP should be aligned for memory
@ -235,52 +227,13 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (1);
break;
case 2:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (2);
break;
case 3:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (3);
break;
}
}

View File

@ -1,5 +1,5 @@
/* _memcopy.c -- subroutines for memory copy functions.
Copyright (C) 1991, 1996 Free Software Foundation, Inc.
Copyright (C) 1991-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Torbjorn Granlund (tege@sics.se).
@ -59,6 +59,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
while (len != 0);
}
#define fwd_align_merge(align) \
do \
{ \
a1 = ((op_t *) srcp)[1]; \
a2 = ((op_t *) srcp)[2]; \
((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
a0 = a2; \
srcp += 2 * OPSIZ; \
dstp += 2 * OPSIZ; \
len -= 2; \
} \
while (len != 0)
/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
DSTP should be aligned for memory operations on `op_t's, but SRCP must
@ -103,109 +118,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (1);
break;
case 2:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (2);
break;
case 3:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (3);
break;
case 4:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (4);
break;
case 5:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (5);
break;
case 6:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (6);
break;
case 7:
do
{
a1 = ((op_t *) srcp)[1];
a2 = ((op_t *) srcp)[2];
((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
a0 = a2;
srcp += 2 * OPSIZ;
dstp += 2 * OPSIZ;
len -= 2;
}
while (len != 0);
fwd_align_merge (7);
break;
}
@ -250,6 +181,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
while (len != 0);
}
#define bwd_align_merge(align) \
do \
{ \
srcp -= 2 * OPSIZ; \
dstp -= 2 * OPSIZ; \
a1 = ((op_t *) srcp)[1]; \
a0 = ((op_t *) srcp)[0]; \
((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
a2 = a0; \
len -= 2; \
} \
while (len != 0)
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'
words (not LEN bytes!). DSTP should be aligned for memory
@ -294,116 +239,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (1);
break;
case 2:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (2);
break;
case 3:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (3);
break;
case 4:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (4);
break;
case 5:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (5);
break;
case 6:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (6);
break;
case 7:
do
{
srcp -= 2 * OPSIZ;
dstp -= 2 * OPSIZ;
a1 = ((op_t *) srcp)[1];
a0 = ((op_t *) srcp)[0];
((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
a2 = a0;
len -= 2;
}
while (len != 0);
bwd_align_merge (7);
break;
}
}