[svn-r10576] Purpose:

Code optimization

Description:
    Use Duff's Device to unroll shuffling loop a bit, for ~30% speedup.

Platforms tested:
    FreeBSD 4.11 (sleipnir)
    Solaris 2.9 (shanti)
This commit is contained in:
Quincey Koziol 2005-04-07 19:44:13 -05:00
parent 227c58bd98
commit b6c87bcdac

View File

@ -158,10 +158,45 @@ H5Z_filter_shuffle(unsigned flags, size_t cd_nelmts, const unsigned cd_values[],
/* Input; unshuffle */
for(i=0; i<bytesoftype; i++) {
_dest=((unsigned char *)dest)+i;
for(j=0; j<numofelements; j++) {
*_dest=*_src++;
_dest+=bytesoftype;
#define DUFF_GUTS \
*_dest=*_src++; \
_dest+=bytesoftype;
#ifdef NO_DUFFS_DEVICE
j = numofelements;
while(j > 0) {
DUFF_GUTS;
j--;
} /* end for */
#else /* NO_DUFFS_DEVICE */
{
size_t duffs_index; /* Counting index for Duff's device */
duffs_index = (numofelements + 7) / 8;
switch (numofelements % 8) {
case 0:
do
{
DUFF_GUTS
case 7:
DUFF_GUTS
case 6:
DUFF_GUTS
case 5:
DUFF_GUTS
case 4:
DUFF_GUTS
case 3:
DUFF_GUTS
case 2:
DUFF_GUTS
case 1:
DUFF_GUTS
} while (--duffs_index > 0);
} /* end switch */
}
#endif /* NO_DUFFS_DEVICE */
#undef DUFF_GUTS
} /* end for */
/* Add leftover to the end of data */
@ -178,10 +213,45 @@ H5Z_filter_shuffle(unsigned flags, size_t cd_nelmts, const unsigned cd_values[],
/* Output; shuffle */
for(i=0; i<bytesoftype; i++) {
_src=((unsigned char *)(*buf))+i;
for(j=0; j<numofelements; j++) {
*_dest++=*_src;
_src+=bytesoftype;
#define DUFF_GUTS \
*_dest++=*_src; \
_src+=bytesoftype;
#ifdef NO_DUFFS_DEVICE
j = numofelements;
while(j > 0) {
DUFF_GUTS;
j--;
} /* end for */
#else /* NO_DUFFS_DEVICE */
{
size_t duffs_index; /* Counting index for Duff's device */
duffs_index = (numofelements + 7) / 8;
switch (numofelements % 8) {
case 0:
do
{
DUFF_GUTS
case 7:
DUFF_GUTS
case 6:
DUFF_GUTS
case 5:
DUFF_GUTS
case 4:
DUFF_GUTS
case 3:
DUFF_GUTS
case 2:
DUFF_GUTS
case 1:
DUFF_GUTS
} while (--duffs_index > 0);
} /* end switch */
}
#endif /* NO_DUFFS_DEVICE */
#undef DUFF_GUTS
} /* end for */
/* Add leftover to the end of data */