glibc/benchtests/bench-strcpy.c
Rajalakshmi Srinivasaraghavan f59ad976ed powerpc: POWER7 strcpy optimization for unaligned strings
This patch optimizes strcpy for ppc64/power7 for unaligned source or
destination address.  The source or destination address is aligned
to doubleword and data is shifted based on the alignment and
added with the previous loaded data to be written as a doubleword.
For each load, cmpb instruction is used for faster null check.

The word aligned optimization is also removed, since the new unaligned
code path shows better results handling word-aligned strings.

More combination of unaligned inputs is also added in benchtest
to measure the improvement.The new optimization shows 2 to 80% of
performance improvement for longer string though it does not show
big difference on string size less than 16 due to additional checks.
2014-12-31 14:35:59 -05:00

194 lines
4.6 KiB
C

/* Measure strcpy functions.
Copyright (C) 2013-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifdef WIDE
# include <wchar.h>
# define CHAR wchar_t
# define UCHAR wchar_t
# define sfmt "ls"
# define BIG_CHAR WCHAR_MAX
# define SMALL_CHAR 1273
# define STRCMP wcscmp
# define MEMCMP wmemcmp
# define MEMSET wmemset
#else
# define CHAR char
# define UCHAR unsigned char
# define sfmt "s"
# define BIG_CHAR CHAR_MAX
# define SMALL_CHAR 127
# define STRCMP strcmp
# define MEMCMP memcmp
# define MEMSET memset
#endif
#ifndef STRCPY_RESULT
# define STRCPY_RESULT(dst, len) dst
# define TEST_MAIN
# ifndef WIDE
# define TEST_NAME "strcpy"
# else
# define TEST_NAME "wcscpy"
# endif
# include "bench-string.h"
# ifndef WIDE
# define SIMPLE_STRCPY simple_strcpy
# define STRCPY strcpy
# else
# define SIMPLE_STRCPY simple_wcscpy
# define STRCPY wcscpy
# endif
CHAR *SIMPLE_STRCPY (CHAR *, const CHAR *);
IMPL (SIMPLE_STRCPY, 0)
IMPL (STRCPY, 1)
CHAR *
SIMPLE_STRCPY (CHAR *dst, const CHAR *src)
{
CHAR *ret = dst;
while ((*dst++ = *src++) != '\0');
return ret;
}
#endif
typedef CHAR *(*proto_t) (CHAR *, const CHAR *);
static void
do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
size_t len __attribute__((unused)))
{
size_t i, iters = INNER_LOOP_ITERS;
timing_t start, stop, cur;
if (CALL (impl, dst, src) != STRCPY_RESULT (dst, len))
{
error (0, 0, "Wrong result in function %s %p %p", impl->name,
CALL (impl, dst, src), STRCPY_RESULT (dst, len));
ret = 1;
return;
}
if (STRCMP (dst, src) != 0)
{
error (0, 0,
"Wrong result in function %s dst \"%" sfmt "\" src \"%" sfmt "\"",
impl->name, dst, src);
ret = 1;
return;
}
TIMING_NOW (start);
for (i = 0; i < iters; ++i)
{
CALL (impl, dst, src);
}
TIMING_NOW (stop);
TIMING_DIFF (cur, start, stop);
TIMING_PRINT_MEAN ((double) cur, (double) iters);
}
static void
do_test (size_t align1, size_t align2, size_t len, int max_char)
{
size_t i;
CHAR *s1, *s2;
/* For wcscpy: align1 and align2 here mean alignment not in bytes,
but in wchar_ts, in bytes it will equal to align * (sizeof (wchar_t))
len for wcschr here isn't in bytes but it's number of wchar_t symbols. */
align1 &= 7;
if ((align1 + len) * sizeof(CHAR) >= page_size)
return;
align2 &= 7;
if ((align2 + len) * sizeof(CHAR) >= page_size)
return;
s1 = (CHAR *) (buf1) + align1;
s2 = (CHAR *) (buf2) + align2;
for (i = 0; i < len; i++)
s1[i] = 32 + 23 * i % (max_char - 32);
s1[len] = 0;
printf ("Length %4zd, alignments in bytes %2zd/%2zd:", len, align1 * sizeof(CHAR), align2 * sizeof(CHAR));
FOR_EACH_IMPL (impl, 0)
do_one_test (impl, s2, s1, len);
putchar ('\n');
}
int
test_main (void)
{
size_t i;
test_init ();
printf ("%23s", "");
FOR_EACH_IMPL (impl, 0)
printf ("\t%s", impl->name);
putchar ('\n');
for (i = 0; i < 16; ++i)
{
do_test (0, 0, i, SMALL_CHAR);
do_test (0, 0, i, BIG_CHAR);
do_test (0, i, i, SMALL_CHAR);
do_test (i, 0, i, BIG_CHAR);
}
for (i = 1; i < 8; ++i)
{
do_test (0, 0, 8 << i, SMALL_CHAR);
do_test (8 - i, 2 * i, 8 << i, SMALL_CHAR);
}
for (i = 1; i < 8; ++i)
{
do_test (i, 2 * i, 8 << i, SMALL_CHAR);
do_test (2 * i, i, 8 << i, BIG_CHAR);
do_test (i, i, 8 << i, SMALL_CHAR);
do_test (i, i, 8 << i, BIG_CHAR);
}
for (i = 16; i <= 512; i+=4)
{
do_test (0, 4, i, SMALL_CHAR);
do_test (4, 0, i, BIG_CHAR);
do_test (4, 4, i, SMALL_CHAR);
do_test (2, 2, i, BIG_CHAR);
do_test (2, 6, i, SMALL_CHAR);
do_test (6, 2, i, BIG_CHAR);
do_test (1, 7, i, SMALL_CHAR);
do_test (7, 1, i, BIG_CHAR);
do_test (3, 4, i, SMALL_CHAR);
do_test (4, 3, i, BIG_CHAR);
do_test (5, 7, i, SMALL_CHAR);
do_test (7, 5, i, SMALL_CHAR);
}
return ret;
}
#include "../test-skeleton.c"