Add a new benchmark for isinf/isnan/isnormal/isfinite/fpclassify. The test uses 2 arrays with 1024 doubles, one with 99% finite FP numbers (10% zeroes, 10% negative) and 1% inf/NaN, the other with 50% inf, and 50% Nan.

ChangeLog:
2015-09-18  Wilco Dijkstra  <wdijkstr@arm.com>

	* benchtests/Makefile: Add bench-math-inlines, link with libm.
	* benchtests/bench-math-inlines.c: New benchmark.
	* benchtests/bench-util.h: New file.
	* benchtests/bench-util.c: New file.
	* benchtests/bench-skeleton.c: Add include of bench-util.c/h.
This commit is contained in:
Wilco Dijkstra 2015-09-18 16:02:38 +01:00
parent e2b918d05d
commit cb2f668d46
6 changed files with 363 additions and 16 deletions

View File

@ -1,3 +1,11 @@
2015-09-18 Wilco Dijkstra <wdijkstr@arm.com>
* benchtests/Makefile: Add bench-math-inlines, link with libm.
* benchtests/bench-math-inlines.c: New benchmark.
* benchtests/bench-util.h: New file.
* benchtests/bench-util.c: New file.
* benchtests/bench-skeleton.c: Add include of bench-util.c/h.
2015-09-18 Carlos O'Donell <carlos@redhat.com>
* elf/tst-dlmopen1.c: Define TEST_SO.

View File

@ -53,7 +53,10 @@ stdlib-bench := strtod
stdio-common-bench := sprintf
benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench)
math-benchset := math-inlines
benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench) \
$(math-benchset)
CFLAGS-bench-ffs.c += -fno-builtin
CFLAGS-bench-ffsll.c += -fno-builtin
@ -61,6 +64,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
bench-malloc := malloc-thread
$(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
$(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
$(objpfx)bench-malloc-thread: $(shared-thread-library)

View File

@ -0,0 +1,285 @@
/* Measure math inline functions.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#define SIZE 1024
#define TEST_MAIN
#define TEST_NAME "math-inlines"
#define TEST_FUNCTION test_main ()
#include "bench-timing.h"
#include "json-lib.h"
#include "bench-util.h"
#include <stdlib.h>
#include <math.h>
#include <stdint.h>
#define BOOLTEST(func) \
static int __attribute__((noinline)) \
func ## _f (double d, int i) \
{ \
if (func (d)) \
return (int) d + i; \
else \
return 5; \
} \
static int \
func ## _t (volatile double *p, size_t n, size_t iters) \
{ \
int i, j; \
int res = 0; \
for (j = 0; j < iters; j++) \
for (i = 0; i < n; i++) \
if (func ## _f (p[i] * 2.0, i) != 0) \
res += 5; \
return res; \
}
#define VALUETEST(func) \
static int __attribute__((noinline)) \
func ## _f (double d) \
{ \
return func (d); \
} \
static int \
func ## _t (volatile double *p, size_t n, size_t iters) \
{ \
int i, j; \
int res = 0; \
for (j = 0; j < iters; j++) \
for (i = 0; i < n; i++) \
res += func ## _f (p[i] * 2.0); \
return res; \
}
typedef union
{
double value;
uint64_t word;
} ieee_double_shape_type;
#define EXTRACT_WORDS64(i,d) \
do { \
ieee_double_shape_type gh_u; \
gh_u.value = (d); \
(i) = gh_u.word; \
} while (0)
/* Inlines similar to existing math_private.h versions. */
static __always_inline int
__isnan_inl (double d)
{
uint64_t di;
EXTRACT_WORDS64 (di, d);
return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
}
static __always_inline int
__isinf_ns2 (double d)
{
uint64_t di;
EXTRACT_WORDS64 (di, d);
return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
}
static __always_inline int
__finite_inl (double d)
{
uint64_t di;
EXTRACT_WORDS64 (di, d);
return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
}
#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
/* Inlines for the builtin functions. */
#define __isnan_builtin(X) __builtin_isnan (X)
#define __isinf_ns_builtin(X) __builtin_isinf (X)
#define __isinf_builtin(X) __builtin_isinf_sign (X)
#define __isfinite_builtin(X) __builtin_isfinite (X)
#define __isnormal_builtin(X) __builtin_isnormal (X)
#define __fpclassify_builtin(X) __builtin_fpclassify (FP_NAN, FP_INFINITE, \
FP_NORMAL, FP_SUBNORMAL, FP_ZERO, (X))
static double __attribute ((noinline))
kernel_standard (double x, double y, int z)
{
return x * y + z;
}
volatile double rem1 = 2.5;
static __always_inline double
remainder_test1 (double x)
{
double y = rem1;
if (((__builtin_expect (y == 0.0, 0) && !__isnan_inl (x))
|| (__builtin_expect (__isinf_ns2 (x), 0) && !__isnan_inl (y))))
return kernel_standard (x, y, 10);
return remainder (x, y);
}
static __always_inline double
remainder_test2 (double x)
{
double y = rem1;
if (((__builtin_expect (y == 0.0, 0) && !__builtin_isnan (x))
|| (__builtin_expect (__builtin_isinf (x), 0) && !__builtin_isnan (y))))
return kernel_standard (x, y, 10);
return remainder (x, y);
}
/* Create test functions for each possibility. */
BOOLTEST (__isnan)
BOOLTEST (__isnan_inl)
BOOLTEST (__isnan_builtin)
BOOLTEST (isnan)
BOOLTEST (__isinf)
BOOLTEST (__isinf_builtin)
BOOLTEST (__isinf_ns2)
BOOLTEST (__isinf_ns_builtin)
BOOLTEST (isinf)
BOOLTEST (__finite)
BOOLTEST (__finite_inl)
BOOLTEST (__isfinite_builtin)
BOOLTEST (isfinite)
BOOLTEST (__isnormal_inl)
BOOLTEST (__isnormal_builtin)
BOOLTEST (isnormal)
VALUETEST (__fpclassify)
VALUETEST (__fpclassify_builtin)
VALUETEST (fpclassify)
VALUETEST (remainder_test1)
VALUETEST (remainder_test2)
typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
typedef struct
{
const char *name;
proto_t fn;
} impl_t;
#define IMPL(name) { #name, name ## _t }
static impl_t test_list[] =
{
IMPL (__isnan),
IMPL (__isnan_inl),
IMPL (__isnan_builtin),
IMPL (isnan),
IMPL (__isinf),
IMPL (__isinf_ns2),
IMPL (__isinf_ns_builtin),
IMPL (__isinf_builtin),
IMPL (isinf),
IMPL (__finite),
IMPL (__finite_inl),
IMPL (__isfinite_builtin),
IMPL (isfinite),
IMPL (__isnormal_inl),
IMPL (__isnormal_builtin),
IMPL (isnormal),
IMPL (__fpclassify),
IMPL (__fpclassify_builtin),
IMPL (fpclassify),
IMPL (remainder_test1),
IMPL (remainder_test2)
};
static void
do_one_test (json_ctx_t *json_ctx, proto_t test_fn, volatile double *arr,
size_t len, const char *testname)
{
size_t iters = 500;
timing_t start, stop, cur;
json_attr_object_begin (json_ctx, testname);
TIMING_NOW (start);
test_fn (arr, len, iters);
TIMING_NOW (stop);
TIMING_DIFF (cur, start, stop);
json_attr_double (json_ctx, "duration", cur);
json_attr_double (json_ctx, "iterations", iters);
json_attr_double (json_ctx, "mean", cur / iters);
json_attr_object_end (json_ctx);
}
static volatile double arr1[SIZE];
static volatile double arr2[SIZE];
int
test_main (void)
{
json_ctx_t json_ctx;
size_t i;
bench_start ();
json_init (&json_ctx, 2, stdout);
json_attr_object_begin (&json_ctx, TEST_NAME);
/* Create 2 test arrays, one with 10% zeroes, 10% negative values,
79% positive values and 1% infinity/NaN. The other contains
50% inf, 50% NaN. This relies on rand behaving correctly. */
for (i = 0; i < SIZE; i++)
{
int x = rand () & 255;
arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100);
if (x == 255) arr1[i] = __builtin_inf ();
if (x == 254) arr1[i] = __builtin_nan ("0");
arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
}
for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
{
json_attr_object_begin (&json_ctx, test_list[i].name);
do_one_test (&json_ctx, test_list[i].fn, arr2, SIZE, "inf/nan");
json_attr_object_end (&json_ctx);
}
for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
{
json_attr_object_begin (&json_ctx, test_list[i].name);
do_one_test (&json_ctx, test_list[i].fn, arr1, SIZE, "normal");
json_attr_object_end (&json_ctx);
}
json_attr_object_end (&json_ctx);
return 0;
}
#include "bench-util.c"
#include "../test-skeleton.c"

View File

@ -24,21 +24,9 @@
#include <inttypes.h>
#include "bench-timing.h"
#include "json-lib.h"
#include "bench-util.h"
volatile unsigned int dontoptimize = 0;
void
startup (void)
{
/* This loop should cause CPU to switch to maximal freqency.
This makes subsequent measurement more accurate. We need a side effect
to prevent the loop being deleted by compiler.
This should be enough to cause CPU to speed up and it is simpler than
running loop for constant time. This is used when user does not have root
access to set a constant freqency. */
for (int k = 0; k < 10000000; k++)
dontoptimize += 23 * dontoptimize + 2;
}
#include "bench-util.c"
#define TIMESPEC_AFTER(a, b) \
(((a).tv_sec == (b).tv_sec) ? \
@ -56,7 +44,7 @@ main (int argc, char **argv)
if (argc == 2 && !strcmp (argv[1], "-d"))
detailed = true;
startup();
bench_start ();
memset (&runtime, 0, sizeof (runtime));

34
benchtests/bench-util.c Normal file
View File

@ -0,0 +1,34 @@
/* Benchmark utility functions.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
static volatile unsigned int dontoptimize = 0;
void
bench_start (void)
{
/* This loop should cause CPU to switch to maximal freqency.
This makes subsequent measurement more accurate. We need a side effect
to prevent the loop being deleted by compiler.
This should be enough to cause CPU to speed up and it is simpler than
running loop for constant time. This is used when user does not have root
access to set a constant freqency. */
for (int k = 0; k < START_ITER; k++)
dontoptimize += 23 * dontoptimize + 2;
}

28
benchtests/bench-util.h Normal file
View File

@ -0,0 +1,28 @@
/* Benchmark utility functions.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifndef START_ITER
# define START_ITER (100000000)
#endif
/* bench_start reduces the random variations due to frequency scaling by
executing a small loop with many memory accesses. START_ITER controls
the number of iterations. */
void bench_start (void);