Fix regressions in async-safe TLS, add run-time control for debugging, add more comments

This commit is contained in:
Stan Shebs 2018-08-03 11:24:51 -07:00 committed by Fangrui Song
parent c0ab16f8cc
commit 21991760c7
10 changed files with 196 additions and 47 deletions

View File

@ -530,12 +530,21 @@ __signal_safe_memalign (size_t boundary, size_t size)
void * weak_function
__signal_safe_malloc (size_t size)
{
if (!GLRO(dl_async_signal_safe))
return malloc (size);
return __signal_safe_memalign (1, size);
}
void weak_function
__signal_safe_free (void *ptr)
{
if (!GLRO(dl_async_signal_safe))
{
free (ptr);
return;
}
if (ptr == NULL)
return;
@ -549,6 +558,9 @@ __signal_safe_free (void *ptr)
void * weak_function
__signal_safe_realloc (void *ptr, size_t size)
{
if (!GLRO(dl_async_signal_safe))
return realloc (ptr, size);
if (size == 0)
{
__signal_safe_free (ptr);
@ -567,7 +579,8 @@ __signal_safe_realloc (void *ptr, size_t size)
if (new_ptr == NULL)
return NULL;
memcpy (new_ptr, ptr, old_size);
/* Copy over the old block (but not its header). */
memcpy (new_ptr, ptr, old_size - sizeof (*header));
__signal_safe_free (ptr);
return new_ptr;
@ -576,6 +589,9 @@ __signal_safe_realloc (void *ptr, size_t size)
void * weak_function
__signal_safe_calloc (size_t nmemb, size_t size)
{
if (!GLRO(dl_async_signal_safe))
return calloc (nmemb, size);
void *ptr = __signal_safe_malloc (nmemb * size);
if (ptr == NULL)
return NULL;

View File

@ -72,6 +72,9 @@ _dl_try_allocate_static_tls (struct link_map *map)
size_t offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align
- map->l_tls_firstbyte_offset);
if (!GLRO(dl_async_signal_safe))
map->l_tls_offset = GL(dl_tls_static_used) = offset;
#elif TLS_DTV_AT_TP
/* dl_tls_static_used includes the TCB at the beginning. */
size_t offset = (ALIGN_UP(GL(dl_tls_static_used)
@ -83,9 +86,15 @@ _dl_try_allocate_static_tls (struct link_map *map)
if (used > GL(dl_tls_static_size))
goto fail;
if (!GLRO(dl_async_signal_safe)) {
map->l_tls_offset = offset;
map->l_tls_firstbyte_offset = GL(dl_tls_static_used);
GL(dl_tls_static_used) = used;
}
#else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
#endif
if (GLRO(dl_async_signal_safe)) {
/* We've computed the new value we want, now try to install it. */
ptrdiff_t val;
if ((val = map->l_tls_offset) == NO_TLS_OFFSET)
@ -110,14 +119,15 @@ _dl_try_allocate_static_tls (struct link_map *map)
goto fail;
}
/* We installed the value; now update the globals. */
#if TLS_TCB_AT_TP
#if TLS_TCB_AT_TP // second
GL(dl_tls_static_used) = offset;
#elif TLS_DTV_AT_TP
#elif TLS_DTV_AT_TP // second
map->l_tls_firstbyte_offset = GL(dl_tls_static_used);
GL(dl_tls_static_used) = used;
#else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
#endif
#else // second
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" // second
#endif // second
}
/* If the object is not yet relocated we cannot initialize the
static TLS region. Delay it. */
@ -147,12 +157,14 @@ _dl_allocate_static_tls (struct link_map *map)
(including this one) and update this map's TLS entry. A signal handler
accessing TLS would try to do the same update and break. */
sigset_t old;
_dl_mask_all_signals (&old);
if (GLRO(dl_async_signal_safe))
_dl_mask_all_signals (&old);
int err = -1;
if (map->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)
err = _dl_try_allocate_static_tls (map);
_dl_unmask_signals (&old);
if (GLRO(dl_async_signal_safe))
_dl_unmask_signals (&old);
if (err != 0)
{
_dl_signal_error (0, map->l_name, NULL, N_("\

View File

@ -73,6 +73,9 @@ const char *_google_exec_origin_dir;
/* Nonzero if runtime lookup should not update the .got/.plt. */
int _dl_bind_not;
/* Nonzero if TLS handling should be async-signal-safe. */
int _dl_async_signal_safe;
/* A dummy link map for the executable, used by dlopen to access the global
scope. We don't export any symbols ourselves, so this can be minimal. */
static struct link_map _dl_main_map =

View File

@ -409,14 +409,14 @@ _dl_resize_dtv (dtv_t *dtv)
dl-minimal.c malloc instead of the real malloc. We can't free
it, we have to abandon the old storage. */
newp = malloc ((2 + newsize) * sizeof (dtv_t));
newp = __signal_safe_malloc ((2 + newsize) * sizeof (dtv_t));
if (newp == NULL)
oom ();
memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
}
else
{
newp = realloc (&dtv[-1],
newp = __signal_safe_realloc (&dtv[-1],
(2 + newsize) * sizeof (dtv_t));
if (newp == NULL)
oom ();
@ -534,13 +534,15 @@ _dl_allocate_tls (void *mem)
}
rtld_hidden_def (_dl_allocate_tls)
/* Clear the given dtv. (We have this here because __signal_safe_free is
not visible to nptl/allocatestack.c.) */
void
_dl_clear_dtv (dtv_t *dtv)
{
for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
if (/*! dtv[1 + cnt].pointer.is_static */ 1
&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
__signal_safe_free (dtv[1 + cnt].pointer.val);
__signal_safe_free (dtv[1 + cnt].pointer.to_free);
memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
}
@ -584,8 +586,53 @@ rtld_hidden_def (_dl_deallocate_tls)
# define GET_ADDR_OFFSET ti->ti_offset
# endif
/* Allocate one DTV entry. */
static struct dtv_pointer
allocate_dtv_entry (size_t alignment, size_t size)
{
if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
{
/* The alignment is supported by malloc. */
void *ptr = malloc (size);
return (struct dtv_pointer) { ptr, ptr };
}
/* Emulate memalign to by manually aligning a pointer returned by
malloc. First compute the size with an overflow check. */
size_t alloc_size = size + alignment;
if (alloc_size < size)
return (struct dtv_pointer) {};
/* Perform the allocation. This is the pointer we need to free
later. */
void *start = malloc (alloc_size);
if (start == NULL)
return (struct dtv_pointer) {};
/* Find the aligned position within the larger allocation. */
void *aligned = (void *) roundup ((uintptr_t) start, alignment);
return (struct dtv_pointer) { .val = aligned, .to_free = start };
}
static struct dtv_pointer
allocate_and_init (struct link_map *map)
{
struct dtv_pointer result = allocate_dtv_entry
(map->l_tls_align, map->l_tls_blocksize);
if (result.val == NULL)
oom ();
/* Initialize the memory. */
memset (__mempcpy (result.val, map->l_tls_initimage,
map->l_tls_initimage_size),
'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
return result;
}
static void
allocate_and_init (dtv_t *dtv, struct link_map *map)
signal_safe_allocate_and_init (dtv_t *dtv, struct link_map *map)
{
void *newp;
newp = __signal_safe_memalign (map->l_tls_align, map->l_tls_blocksize);
@ -642,13 +689,18 @@ _dl_update_slotinfo (unsigned long int req_modid)
size_t total = 0;
sigset_t old;
_dl_mask_all_signals (&old);
/* We use the signal mask as a lock against reentrancy here.
Check that a signal taken before the lock didn't already
update us. */
dtv = THREAD_DTV ();
if (dtv[0].counter >= listp->slotinfo[idx].gen)
goto out;
if (GLRO(dl_async_signal_safe)) {
_dl_mask_all_signals (&old);
/* We use the signal mask as a lock against reentrancy here.
Check that a signal taken before the lock didn't already
update us. */
dtv = THREAD_DTV ();
if (dtv[0].counter >= listp->slotinfo[idx].gen)
{
_dl_unmask_signals (&old);
return the_map;
}
}
/* We have to look through the entire dtv slotinfo list. */
listp = GL(dl_tls_dtv_slotinfo_list);
do
@ -674,13 +726,13 @@ _dl_update_slotinfo (unsigned long int req_modid)
struct link_map *map = listp->slotinfo[cnt].map;
if (map == NULL)
{
if (dtv[-1].counter >= total + cnt)
if (dtv[-1].counter >= modid)
{
/* If this modid was used at some point the memory
might still be allocated. */
__signal_safe_free (dtv[total + cnt].pointer.to_free);
dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
dtv[total + cnt].pointer.to_free = NULL;
__signal_safe_free (dtv[modid].pointer.to_free);
dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
dtv[modid].pointer.to_free = NULL;
}
continue;
@ -718,8 +770,8 @@ _dl_update_slotinfo (unsigned long int req_modid)
/* This will be the new maximum generation counter. */
dtv[0].counter = new_gen;
out:
_dl_unmask_signals (&old);
if (GLRO(dl_async_signal_safe))
_dl_unmask_signals (&old);
}
return the_map;
@ -745,6 +797,50 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
the_map = listp->slotinfo[idx].map;
}
if (!GLRO(dl_async_signal_safe)) {
/* Make sure that, if a dlopen running in parallel forces the
variable into static storage, we'll wait until the address in the
static TLS block is set up, and use that. If we're undecided
yet, make sure we make the decision holding the lock as well. */
if (__glibc_unlikely (the_map->l_tls_offset
!= FORCED_DYNAMIC_TLS_OFFSET))
{
__rtld_lock_lock_recursive (GL(dl_load_lock));
if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
{
the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
__rtld_lock_unlock_recursive (GL(dl_load_lock));
}
else if (__glibc_likely (the_map->l_tls_offset
!= FORCED_DYNAMIC_TLS_OFFSET))
{
#if TLS_TCB_AT_TP
void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
#elif TLS_DTV_AT_TP
void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
#else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
#endif
__rtld_lock_unlock_recursive (GL(dl_load_lock));
dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
dtv[GET_ADDR_MODULE].pointer.val = p;
return (char *) p + GET_ADDR_OFFSET;
}
else
__rtld_lock_unlock_recursive (GL(dl_load_lock));
}
struct dtv_pointer result = allocate_and_init (the_map);
dtv[GET_ADDR_MODULE].pointer = result;
assert (result.to_free != NULL);
return (char *) result.val + GET_ADDR_OFFSET;
} else {
sigset_t old;
_dl_mask_all_signals (&old);
@ -778,11 +874,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
if (offset == FORCED_DYNAMIC_TLS_OFFSET)
{
allocate_and_init (&dtv[GET_ADDR_MODULE], the_map);
signal_safe_allocate_and_init (&dtv[GET_ADDR_MODULE], the_map);
}
else
{
void **pp = &dtv[GET_ADDR_MODULE].pointer.val;
void ** volatile pp = &dtv[GET_ADDR_MODULE].pointer.val;
while (atomic_forced_read (*pp) == TLS_DTV_UNALLOCATED)
{
/* for lack of a better (safe) thing to do, just spin.
@ -803,6 +899,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
_dl_unmask_signals (&old);
return (char *) dtv[GET_ADDR_MODULE].pointer.val + GET_ADDR_OFFSET;
}
}

View File

@ -2502,6 +2502,9 @@ process_envvars (enum mode *modep)
enum mode mode = normal;
char *debug_output = NULL;
/* Enable async-signal-safe TLS by default. */
GLRO(dl_async_signal_safe) = 1;
/* This is the default place for profiling data file. */
GLRO(dl_profile_output)
= &"/var/tmp\0/var/profile"[__libc_enable_secure ? 9 : 0];
@ -2525,6 +2528,10 @@ process_envvars (enum mode *modep)
/* Warning level, verbose or not. */
if (memcmp (envline, "WARN", 4) == 0)
GLRO(dl_verbose) = envline[5] != '\0';
#if 0 /* enable to get runtime control over async signal safety */
if (memcmp (envline, "SAFE", 4) == 0)
GLRO(dl_async_signal_safe) = 1;
#endif
break;
case 5:
@ -2537,7 +2544,12 @@ process_envvars (enum mode *modep)
if (memcmp (envline, "AUDIT", 5) == 0)
audit_list_string = &envline[6];
break;
#if 0 /* enable to get runtime control over async signal safety */
case 6:
if (memcmp (envline, "UNSAFE", 6) == 0)
GLRO(dl_async_signal_safe) = 0;
break;
#endif
case 7:
/* Print information about versions. */
if (memcmp (envline, "VERBOSE", 7) == 0)
@ -2666,6 +2678,16 @@ process_envvars (enum mode *modep)
/* The caller wants this information. */
*modep = mode;
#if 0 /* enable this to help debug async-safe TLS */
if (GLRO(dl_debug_mask))
{
if (GLRO(dl_async_signal_safe))
_dl_printf ("TLS is async-signal-safe\n");
else
_dl_printf ("TLS is NOT async-signal-safe\n");
}
#endif /* for async-safe TLS */
/* Extra security for SUID binaries. Remove all dangerous environment
variables. */
if (__builtin_expect (__libc_enable_secure, 0))

View File

@ -374,7 +374,7 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
tst-oncex3 tst-oncex4
ifeq ($(build-shared),yes)
tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder
tests += tst-tls7
tests += tst-tls7a
tests-internal += tst-tls3 tst-tls3-malloc tst-tls5 tst-stackguard1
tests-nolibpthread += tst-fini1
ifeq ($(have-z-execstack),yes)
@ -390,7 +390,7 @@ modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \
tst-_res1mod1 tst-_res1mod2 tst-fini1mod \
tst-join7mod tst-compat-forwarder-mod
modules-names += tst-tls7mod
modules-names += tst-tls7amod
ifneq ($(with-clang),yes)
modules-names += tst-execstack-mod
endif
@ -408,7 +408,7 @@ tst-tls5modc.so-no-z-defs = yes
tst-tls5modd.so-no-z-defs = yes
tst-tls5mode.so-no-z-defs = yes
tst-tls5modf.so-no-z-defs = yes
tst-tls7mod.so-no-z-defs = yes
tst-tls7amod.so-no-z-defs = yes
ifeq ($(build-shared),yes)
# Build all the modules even when not actually running test programs.
@ -603,11 +603,9 @@ $(objpfx)tst-tls5: $(objpfx)tst-tls5mod.so $(shared-thread-library)
LDFLAGS-tst-tls5 = $(no-as-needed)
LDFLAGS-tst-tls5mod.so = -Wl,-soname,tst-tls5mod.so
# ensure free(malloc()) isn't optimized out
CFLAGS-tst-tls7.c = -fno-builtin-malloc -fno-builtin-free
$(objpfx)tst-tls7: $(libdl) $(shared-thread-library)
$(objpfx)tst-tls7.out: $(objpfx)tst-tls7mod.so
$(objpfx)tst-tls7mod.so: $(shared-thread-library)
$(objpfx)tst-tls7a: $(libdl) $(shared-thread-library)
$(objpfx)tst-tls7a.out: $(objpfx)tst-tls7amod.so
$(objpfx)tst-tls7amod.so: $(shared-thread-library)
ifeq ($(build-shared),yes)
$(objpfx)tst-tls6.out: tst-tls6.sh $(objpfx)tst-tls5 \

View File

@ -1231,6 +1231,7 @@ __nptl_setxid (struct xid_command *cmdp)
static inline void __attribute__((always_inline))
init_one_static_tls (struct pthread *curp, struct link_map *map)
{
dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
# if TLS_TCB_AT_TP
void *dest = (char *) curp - map->l_tls_offset;
# elif TLS_DTV_AT_TP
@ -1243,15 +1244,11 @@ init_one_static_tls (struct pthread *curp, struct link_map *map)
memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
#if 0 /* still needed? dtv refs gone in current code */
/* Fill in the DTV slot so that a later LD/GD access will find it. */
dtv[map->l_tls_modid].pointer.is_static = true;
/* Pairs against the read barrier in tls_get_attr_tail, guaranteeing
any thread waiting for an update to pointer.val sees the
initimage write. */
atomic_write_barrier ();
dtv[map->l_tls_modid].pointer.val = dest;
#endif
}
void

View File

@ -20,7 +20,6 @@
from a signal handler. */
#include <assert.h>
#include <atomic.h>
#include <dlfcn.h>
#include <pthread.h>
#include <semaphore.h>
@ -34,7 +33,9 @@ spin (void *ignored)
while (1)
{
/* busywork */
free (malloc (128));
void *volatile p;
p = malloc (128);
free (p);
}
/* never reached */
@ -48,7 +49,7 @@ action (int signo, siginfo_t *info, void *ignored)
{
sem_t *sem = info->si_value.sival_ptr;
atomic_read_barrier ();
__asm ("" ::: "memory"); // atomic_read_barrier
assert (tls7mod_action != NULL);
(*tls7mod_action) (signo, info, ignored);
@ -75,7 +76,7 @@ do_test (void)
for (int i = 0; i < NITERS; ++i)
{
void *h = dlopen ("tst-tls7mod.so", RTLD_LAZY);
void *h = dlopen ("tst-tls7amod.so", RTLD_LAZY);
if (h == NULL)
{
puts ("dlopen failed");
@ -88,7 +89,7 @@ do_test (void)
puts ("dlsym for action failed");
exit (1);
}
atomic_write_barrier ();
__asm ("" ::: "memory"); // atomic_write_barrier
struct sigaction sa;
sa.sa_sigaction = action;

View File

@ -521,6 +521,9 @@ struct rtld_global_ro
linking. */
EXTERN int _dl_dynamic_weak;
/* Nonzero if TLS handling should be async-signal-safe. */
EXTERN int _dl_async_signal_safe;
/* Default floating-point control word. */
EXTERN fpu_control_t _dl_fpu_control;