2006-09-09 19:21:48 +08:00
|
|
|
#include <pthread.h>
|
2006-09-08 18:41:17 +08:00
|
|
|
#include <stdio.h>
|
2006-09-09 19:21:48 +08:00
|
|
|
#include <stdlib.h>
|
2006-09-08 18:41:17 +08:00
|
|
|
|
|
|
|
|
|
|
|
static pthread_barrier_t b;
|
|
|
|
static pthread_cond_t c = PTHREAD_COND_INITIALIZER;
|
|
|
|
static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
cl (void *arg)
|
|
|
|
{
|
|
|
|
pthread_mutex_unlock (&m);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void *
|
|
|
|
tf (void *arg)
|
|
|
|
{
|
|
|
|
if (pthread_mutex_lock (&m) != 0)
|
|
|
|
{
|
|
|
|
printf ("%s: mutex_lock failed\n", __func__);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
int e = pthread_barrier_wait (&b);
|
|
|
|
if (e != 0 && e != PTHREAD_BARRIER_SERIAL_THREAD)
|
|
|
|
{
|
|
|
|
printf ("%s: barrier_wait failed\n", __func__);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
pthread_cleanup_push (cl, NULL);
|
2006-09-12 19:44:01 +08:00
|
|
|
/* We have to loop here because the cancellation might come after
|
|
|
|
the cond_wait call left the cancelable area and is then waiting
|
|
|
|
on the mutex. In this case the beginning of the second cond_wait
|
|
|
|
call will cause the cancellation to happen. */
|
2006-09-15 15:48:10 +08:00
|
|
|
do
|
|
|
|
if (pthread_cond_wait (&c, &m) != 0)
|
|
|
|
{
|
|
|
|
printf ("%s: cond_wait failed\n", __func__);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
while (arg == NULL);
|
|
|
|
pthread_cleanup_pop (0);
|
|
|
|
if (pthread_mutex_unlock (&m) != 0)
|
2006-09-08 18:41:17 +08:00
|
|
|
{
|
2006-09-15 15:48:10 +08:00
|
|
|
printf ("%s: mutex_unlock failed\n", __func__);
|
|
|
|
exit (1);
|
2006-09-08 18:41:17 +08:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
do_test (void)
|
|
|
|
{
|
|
|
|
int status = 0;
|
|
|
|
|
|
|
|
if (pthread_barrier_init (&b, NULL, 2) != 0)
|
|
|
|
{
|
|
|
|
puts ("barrier_init failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pthread_t th;
|
|
|
|
if (pthread_create (&th, NULL, tf, NULL) != 0)
|
|
|
|
{
|
|
|
|
puts ("1st create failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
int e = pthread_barrier_wait (&b);
|
|
|
|
if (e != 0 && e != PTHREAD_BARRIER_SERIAL_THREAD)
|
|
|
|
{
|
|
|
|
puts ("1st barrier_wait failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_mutex_lock (&m) != 0)
|
|
|
|
{
|
|
|
|
puts ("1st mutex_lock failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_cond_signal (&c) != 0)
|
|
|
|
{
|
|
|
|
puts ("1st cond_signal failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_cancel (th) != 0)
|
|
|
|
{
|
|
|
|
puts ("cancel failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_mutex_unlock (&m) != 0)
|
|
|
|
{
|
|
|
|
puts ("1st mutex_unlock failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
void *res;
|
|
|
|
if (pthread_join (th, &res) != 0)
|
|
|
|
{
|
|
|
|
puts ("1st join failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (res != PTHREAD_CANCELED)
|
|
|
|
{
|
|
|
|
puts ("first thread not canceled");
|
|
|
|
status = 1;
|
|
|
|
}
|
|
|
|
|
2021-11-17 19:20:13 +08:00
|
|
|
printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n",
|
|
|
|
c.__data.__wseq.__value32.__high,
|
|
|
|
c.__data.__wseq.__value32.__low,
|
|
|
|
c.__data.__g1_start.__value32.__high,
|
|
|
|
c.__data.__g1_start.__value32.__low,
|
New condvar implementation that provides stronger ordering guarantees.
This is a new implementation for condition variables, required
after http://austingroupbugs.net/view.php?id=609 to fix bug 13165. In
essence, we need to be stricter in which waiters a signal or broadcast
is required to wake up; this couldn't be solved using the old algorithm.
ISO C++ made a similar clarification, so this also fixes a bug in
current libstdc++, for example.
We can't use the old algorithm anymore because futexes do not guarantee
to wake in FIFO order. Thus, when we wake, we can't simply let any
waiter grab a signal, but we need to ensure that one of the waiters
happening before the signal is woken up. This is something the previous
algorithm violated (see bug 13165).
There's another issue specific to condvars: ABA issues on the underlying
futexes. Unlike mutexes that have just three states, or semaphores that
have no tokens or a limited number of them, the state of a condvar is
the *order* of the waiters. A waiter on a semaphore can grab a token
whenever one is available; a condvar waiter must only consume a signal
if it is eligible to do so as determined by the relative order of the
waiter and the signal.
Therefore, this new algorithm maintains two groups of waiters: Those
eligible to consume signals (G1), and those that have to wait until
previous waiters have consumed signals (G2). Once G1 is empty, G2
becomes the new G1. 64b counters are used to avoid ABA issues.
This condvar doesn't yet use a requeue optimization (ie, on a broadcast,
waking just one thread and requeueing all others on the futex of the
mutex supplied by the program). I don't think doing the requeue is
necessarily the right approach (but I haven't done real measurements
yet):
* If a program expects to wake many threads at the same time and make
that scalable, a condvar isn't great anyway because of how it requires
waiters to operate mutually exclusive (due to the mutex usage). Thus, a
thundering herd problem is a scalability problem with or without the
optimization. Using something like a semaphore might be more
appropriate in such a case.
* The scalability problem is actually at the mutex side; the condvar
could help (and it tries to with the requeue optimization), but it
should be the mutex who decides how that is done, and whether it is done
at all.
* Forcing all but one waiter into the kernel-side wait queue of the
mutex prevents/avoids the use of lock elision on the mutex. Thus, it
prevents the only cure against the underlying scalability problem
inherent to condvars.
* If condvars use short critical sections (ie, hold the mutex just to
check a binary flag or such), which they should do ideally, then forcing
all those waiter to proceed serially with kernel-based hand-off (ie,
futex ops in the mutex' contended state, via the futex wait queues) will
be less efficient than just letting a scalable mutex implementation take
care of it. Our current mutex impl doesn't employ spinning at all, but
if critical sections are short, spinning can be much better.
* Doing the requeue stuff requires all waiters to always drive the mutex
into the contended state. This leads to each waiter having to call
futex_wake after lock release, even if this wouldn't be necessary.
[BZ #13165]
* nptl/pthread_cond_broadcast.c (__pthread_cond_broadcast): Rewrite to
use new algorithm.
* nptl/pthread_cond_destroy.c (__pthread_cond_destroy): Likewise.
* nptl/pthread_cond_init.c (__pthread_cond_init): Likewise.
* nptl/pthread_cond_signal.c (__pthread_cond_signal): Likewise.
* nptl/pthread_cond_wait.c (__pthread_cond_wait): Likewise.
(__pthread_cond_timedwait): Move here from pthread_cond_timedwait.c.
(__condvar_confirm_wakeup, __condvar_cancel_waiting,
__condvar_cleanup_waiting, __condvar_dec_grefs,
__pthread_cond_wait_common): New.
(__condvar_cleanup): Remove.
* npt/pthread_condattr_getclock.c (pthread_condattr_getclock): Adapt.
* npt/pthread_condattr_setclock.c (pthread_condattr_setclock):
Likewise.
* npt/pthread_condattr_getpshared.c (pthread_condattr_getpshared):
Likewise.
* npt/pthread_condattr_init.c (pthread_condattr_init): Likewise.
* nptl/tst-cond1.c: Add comment.
* nptl/tst-cond20.c (do_test): Adapt.
* nptl/tst-cond22.c (do_test): Likewise.
* sysdeps/aarch64/nptl/bits/pthreadtypes.h (pthread_cond_t): Adapt
structure.
* sysdeps/arm/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/ia64/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/m68k/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/microblaze/nptl/bits/pthreadtypes.h (pthread_cond_t):
Likewise.
* sysdeps/mips/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/nios2/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/s390/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/sh/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/tile/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h (pthread_cond_t):
Likewise.
* sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h (pthread_cond_t):
Likewise.
* sysdeps/x86/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/nptl/internaltypes.h (COND_NWAITERS_SHIFT): Remove.
(COND_CLOCK_BITS): Adapt.
* sysdeps/nptl/pthread.h (PTHREAD_COND_INITIALIZER): Adapt.
* nptl/pthreadP.h (__PTHREAD_COND_CLOCK_MONOTONIC_MASK,
__PTHREAD_COND_SHARED_MASK): New.
* nptl/nptl-printers.py (CLOCK_IDS): Remove.
(ConditionVariablePrinter, ConditionVariableAttributesPrinter): Adapt.
* nptl/nptl_lock_constants.pysym: Adapt.
* nptl/test-cond-printers.py: Adapt.
* sysdeps/unix/sysv/linux/hppa/internaltypes.h (cond_compat_clear,
cond_compat_check_and_clear): Adapt.
* sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c: Remove file ...
* sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c
(__pthread_cond_timedwait): ... and move here.
* nptl/DESIGN-condvar.txt: Remove file.
* nptl/lowlevelcond.sym: Likewise.
* nptl/pthread_cond_timedwait.c: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
2016-05-26 05:43:36 +08:00
|
|
|
c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
|
|
|
|
c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
|
|
|
|
c.__data.__g1_orig_size, c.__data.__wrefs);
|
2006-09-08 18:41:17 +08:00
|
|
|
|
2006-09-15 15:48:10 +08:00
|
|
|
if (pthread_create (&th, NULL, tf, (void *) 1l) != 0)
|
2006-09-08 18:41:17 +08:00
|
|
|
{
|
|
|
|
puts ("2nd create failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
e = pthread_barrier_wait (&b);
|
|
|
|
if (e != 0 && e != PTHREAD_BARRIER_SERIAL_THREAD)
|
|
|
|
{
|
|
|
|
puts ("2nd barrier_wait failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_mutex_lock (&m) != 0)
|
|
|
|
{
|
|
|
|
puts ("2nd mutex_lock failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_cond_signal (&c) != 0)
|
|
|
|
{
|
|
|
|
puts ("2nd cond_signal failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_mutex_unlock (&m) != 0)
|
|
|
|
{
|
|
|
|
puts ("2nd mutex_unlock failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (pthread_join (th, &res) != 0)
|
|
|
|
{
|
|
|
|
puts ("2nd join failed");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (res != NULL)
|
|
|
|
{
|
|
|
|
puts ("2nd thread canceled");
|
|
|
|
status = 1;
|
|
|
|
}
|
|
|
|
|
2021-11-17 19:20:13 +08:00
|
|
|
printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n",
|
|
|
|
c.__data.__wseq.__value32.__high,
|
|
|
|
c.__data.__wseq.__value32.__low,
|
|
|
|
c.__data.__g1_start.__value32.__high,
|
|
|
|
c.__data.__g1_start.__value32.__low,
|
New condvar implementation that provides stronger ordering guarantees.
This is a new implementation for condition variables, required
after http://austingroupbugs.net/view.php?id=609 to fix bug 13165. In
essence, we need to be stricter in which waiters a signal or broadcast
is required to wake up; this couldn't be solved using the old algorithm.
ISO C++ made a similar clarification, so this also fixes a bug in
current libstdc++, for example.
We can't use the old algorithm anymore because futexes do not guarantee
to wake in FIFO order. Thus, when we wake, we can't simply let any
waiter grab a signal, but we need to ensure that one of the waiters
happening before the signal is woken up. This is something the previous
algorithm violated (see bug 13165).
There's another issue specific to condvars: ABA issues on the underlying
futexes. Unlike mutexes that have just three states, or semaphores that
have no tokens or a limited number of them, the state of a condvar is
the *order* of the waiters. A waiter on a semaphore can grab a token
whenever one is available; a condvar waiter must only consume a signal
if it is eligible to do so as determined by the relative order of the
waiter and the signal.
Therefore, this new algorithm maintains two groups of waiters: Those
eligible to consume signals (G1), and those that have to wait until
previous waiters have consumed signals (G2). Once G1 is empty, G2
becomes the new G1. 64b counters are used to avoid ABA issues.
This condvar doesn't yet use a requeue optimization (ie, on a broadcast,
waking just one thread and requeueing all others on the futex of the
mutex supplied by the program). I don't think doing the requeue is
necessarily the right approach (but I haven't done real measurements
yet):
* If a program expects to wake many threads at the same time and make
that scalable, a condvar isn't great anyway because of how it requires
waiters to operate mutually exclusive (due to the mutex usage). Thus, a
thundering herd problem is a scalability problem with or without the
optimization. Using something like a semaphore might be more
appropriate in such a case.
* The scalability problem is actually at the mutex side; the condvar
could help (and it tries to with the requeue optimization), but it
should be the mutex who decides how that is done, and whether it is done
at all.
* Forcing all but one waiter into the kernel-side wait queue of the
mutex prevents/avoids the use of lock elision on the mutex. Thus, it
prevents the only cure against the underlying scalability problem
inherent to condvars.
* If condvars use short critical sections (ie, hold the mutex just to
check a binary flag or such), which they should do ideally, then forcing
all those waiter to proceed serially with kernel-based hand-off (ie,
futex ops in the mutex' contended state, via the futex wait queues) will
be less efficient than just letting a scalable mutex implementation take
care of it. Our current mutex impl doesn't employ spinning at all, but
if critical sections are short, spinning can be much better.
* Doing the requeue stuff requires all waiters to always drive the mutex
into the contended state. This leads to each waiter having to call
futex_wake after lock release, even if this wouldn't be necessary.
[BZ #13165]
* nptl/pthread_cond_broadcast.c (__pthread_cond_broadcast): Rewrite to
use new algorithm.
* nptl/pthread_cond_destroy.c (__pthread_cond_destroy): Likewise.
* nptl/pthread_cond_init.c (__pthread_cond_init): Likewise.
* nptl/pthread_cond_signal.c (__pthread_cond_signal): Likewise.
* nptl/pthread_cond_wait.c (__pthread_cond_wait): Likewise.
(__pthread_cond_timedwait): Move here from pthread_cond_timedwait.c.
(__condvar_confirm_wakeup, __condvar_cancel_waiting,
__condvar_cleanup_waiting, __condvar_dec_grefs,
__pthread_cond_wait_common): New.
(__condvar_cleanup): Remove.
* npt/pthread_condattr_getclock.c (pthread_condattr_getclock): Adapt.
* npt/pthread_condattr_setclock.c (pthread_condattr_setclock):
Likewise.
* npt/pthread_condattr_getpshared.c (pthread_condattr_getpshared):
Likewise.
* npt/pthread_condattr_init.c (pthread_condattr_init): Likewise.
* nptl/tst-cond1.c: Add comment.
* nptl/tst-cond20.c (do_test): Adapt.
* nptl/tst-cond22.c (do_test): Likewise.
* sysdeps/aarch64/nptl/bits/pthreadtypes.h (pthread_cond_t): Adapt
structure.
* sysdeps/arm/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/ia64/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/m68k/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/microblaze/nptl/bits/pthreadtypes.h (pthread_cond_t):
Likewise.
* sysdeps/mips/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/nios2/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/s390/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/sh/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/tile/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h (pthread_cond_t):
Likewise.
* sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h (pthread_cond_t):
Likewise.
* sysdeps/x86/bits/pthreadtypes.h (pthread_cond_t): Likewise.
* sysdeps/nptl/internaltypes.h (COND_NWAITERS_SHIFT): Remove.
(COND_CLOCK_BITS): Adapt.
* sysdeps/nptl/pthread.h (PTHREAD_COND_INITIALIZER): Adapt.
* nptl/pthreadP.h (__PTHREAD_COND_CLOCK_MONOTONIC_MASK,
__PTHREAD_COND_SHARED_MASK): New.
* nptl/nptl-printers.py (CLOCK_IDS): Remove.
(ConditionVariablePrinter, ConditionVariableAttributesPrinter): Adapt.
* nptl/nptl_lock_constants.pysym: Adapt.
* nptl/test-cond-printers.py: Adapt.
* sysdeps/unix/sysv/linux/hppa/internaltypes.h (cond_compat_clear,
cond_compat_check_and_clear): Adapt.
* sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c: Remove file ...
* sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c
(__pthread_cond_timedwait): ... and move here.
* nptl/DESIGN-condvar.txt: Remove file.
* nptl/lowlevelcond.sym: Likewise.
* nptl/pthread_cond_timedwait.c: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i586/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
2016-05-26 05:43:36 +08:00
|
|
|
c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
|
|
|
|
c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
|
|
|
|
c.__data.__g1_orig_size, c.__data.__wrefs);
|
2006-09-08 18:41:17 +08:00
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TEST_FUNCTION do_test ()
|
|
|
|
#include "../test-skeleton.c"
|