From cbd9fd236981717d3d4ee942986ea912e9707c32 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Thu, 16 Jan 2025 20:02:42 +0100 Subject: [PATCH] Consolidate TLS block allocation for static binaries with ld.so Use the same code to compute the TLS block size and its alignment. The code in elf/dl-tls.c is linked in anyway for all binaries due to the reference to _dl_tls_static_surplus_init. It is not possible to call _dl_allocate_tls_storage directly because malloc is not available in the static case. (The dynamic linker uses the minimal malloc at this stage.) Therefore, split _dl_tls_block_size_with_pre and _dl_tls_block_align from _dl_allocate_tls_storage, and call those new functions from __libc_setup_tls. This fixes extra TLS allocation for the static case, and apparently some pre-existing bugs as well (the independent recomputation of TLS block sizes in init_static_tls looks rather suspect). Fixes commit 0e411c5d3098982d67cd2d7a233eaa6c977a1869 ("Add generic 'extra TLS'"). --- csu/libc-tls.c | 193 ++++++++------------------------------- elf/dl-tls.c | 50 ++-------- elf/dl-tls_block_align.h | 70 ++++++++++++++ 3 files changed, 114 insertions(+), 199 deletions(-) create mode 100644 elf/dl-tls_block_align.h diff --git a/csu/libc-tls.c b/csu/libc-tls.c index 5ffebc6fc2..07c37f3af9 100644 --- a/csu/libc-tls.c +++ b/csu/libc-tls.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #ifdef SHARED #error makefile bug, this file is for static only @@ -74,7 +76,7 @@ TLS_INIT_HELPER #endif static void -init_slotinfo (void) +init_slotinfo (struct link_map *main_map) { /* Create the slotinfo list. Note that the type of static_slotinfo has effectively a zero-length array, so we cannot use the size of @@ -82,39 +84,26 @@ init_slotinfo (void) static_slotinfo.len = array_length (_dl_static_dtv); /* static_slotinfo.next = NULL; -- Already zero. */ + main_map->l_tls_modid = 1; + static_slotinfo.slotinfo[1].map = main_map; + main_map->l_tls_in_slotinfo = 1; + /* The slotinfo list. Will be extended by the code doing dynamic linking. */ GL(dl_tls_max_dtv_idx) = 1; GL(dl_tls_dtv_slotinfo_list) = &static_slotinfo; } -static void -init_static_tls (size_t memsz, size_t align) -{ - /* That is the size of the TLS memory for this object. */ - GL(dl_tls_static_size) = roundup (memsz + GLRO(dl_tls_static_surplus), - TCB_ALIGNMENT); -#if TLS_TCB_AT_TP - GL(dl_tls_static_size) += TLS_TCB_SIZE; -#endif - GL(dl_tls_static_used) = memsz; - /* The alignment requirement for the static TLS block. */ - GL(dl_tls_static_align) = align; - /* Number of elements in the static TLS block. */ - GL(dl_tls_static_nelem) = GL(dl_tls_max_dtv_idx); -} - +/* Perform TLS setup for statically linked binaries. Similar to + init_tls in elf/rtld.c. */ void __libc_setup_tls (void) { - void *tlsblock; size_t memsz = 0; size_t filesz = 0; void *initimage = NULL; size_t align = 0; - size_t tls_blocks_size = 0; size_t max_align = TCB_ALIGNMENT; - size_t tcb_offset; const ElfW(Phdr) *phdr; struct link_map *main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded; @@ -132,152 +121,44 @@ __libc_setup_tls (void) align = phdr->p_align; if (phdr->p_align > max_align) max_align = phdr->p_align; + + main_map->l_tls_align = align; + main_map->l_tls_blocksize = memsz; + main_map->l_tls_initimage = initimage; + main_map->l_tls_initimage_size = filesz; + init_slotinfo (main_map); break; } + /* Number of elements in the static TLS block. */ + GL(dl_tls_static_nelem) = GL(dl_tls_max_dtv_idx); + /* Calculate the size of the static TLS surplus, with 0 auditors. */ _dl_tls_static_surplus_init (0); - /* Extra TLS block for internal usage to append at the end of the TLS blocks - (in allocation order). The address at which the block is allocated must - be aligned to 'extra_tls_align'. The size of the block as returned by - '_dl_extra_tls_get_size ()' is always a multiple of the aligment. + /* Calculate the TLS block size. */ + _dl_determine_tlsoffset (); - On Linux systems this is where the rseq area will be allocated. On other - systems it is currently unused and both values will be '0'. */ - size_t extra_tls_size = _dl_extra_tls_get_size (); - size_t extra_tls_align = _dl_extra_tls_get_align (); - - /* Increase the maximum alignment with the extra TLS alignment requirements - if necessary. */ - max_align = MAX (max_align, extra_tls_align); - - /* We have to set up the TCB block which also (possibly) contains - 'errno'. Therefore we avoid 'malloc' which might touch 'errno'. - Instead we use 'sbrk' which would only uses 'errno' if it fails. - In this case we are right away out of memory and the user gets - what she/he deserves. */ -#if TLS_TCB_AT_TP - /* In this layout the TLS blocks are located before the thread pointer. */ - - /* Record the size of the combined TLS blocks. - - First reserve space for 'memsz' while respecting both its alignment - requirements and those of the extra TLS blocks. Then add the size of - the extra TLS block. Both values respect the extra TLS alignment - requirements and so does the resulting size and the offset that will - be derived from it. */ - tls_blocks_size = roundup (memsz, MAX (align, extra_tls_align) ?: 1) - + extra_tls_size; - - /* Record the extra TLS block offset from the thread pointer. - - With TLS_TCB_AT_TP the TLS blocks are allocated before the thread pointer - in reverse order. Our block is added last which results in it being the - first in the static TLS block, thus record the most negative offset. - - The alignment requirements of the pointer resulting from this offset and - the thread pointer are enforced by 'max_align' which is used to align the - tcb_offset. */ - _dl_extra_tls_set_offset (-tls_blocks_size); - - /* Align the TCB offset to the maximum alignment, as - _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign - and dl_tls_static_align. */ - tcb_offset = roundup (tls_blocks_size + GLRO(dl_tls_static_surplus), max_align); - tlsblock = _dl_early_allocate (tcb_offset + TLS_INIT_TCB_SIZE + max_align); - if (tlsblock == NULL) - _startup_fatal_tls_error (); -#elif TLS_DTV_AT_TP - /* In this layout the TLS blocks are located after the thread pointer. */ - - /* Record the tcb_offset including the aligment requirements of 'memsz' - that comes after it. */ - tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1); - - /* Record the size of the combined TLS blocks. - - First reserve space for TLS_INIT_TCB_SIZE and 'memsz' while respecting - both its alignment requirements and those of the extra TLS blocks. Then - add the size of the extra TLS block. Both values respect the extra TLS - alignment requirements and so does the resulting size and the offset that - will be derived from it. */ - tls_blocks_size = roundup (TLS_INIT_TCB_SIZE + memsz, - MAX (align, extra_tls_align) ?: 1) + extra_tls_size; - - /* Record the extra TLS block offset from the thread pointer. - - With TLS_DTV_AT_TP the TLS blocks are allocated after the thread pointer in - order. Our block is added last which results in it being the last in the - static TLS block, thus record the offset as the size of the static TLS - block minus the size of our block. - - On some architectures the TLS blocks are offset from the thread pointer, - include this offset in the extra TLS block offset. - - The alignment requirements of the pointer resulting from this offset and - the thread pointer are enforced by 'max_align' which is used to align the - tcb_offset. */ - _dl_extra_tls_set_offset (tls_blocks_size - extra_tls_size - TLS_TP_OFFSET); - - tlsblock = _dl_early_allocate (tls_blocks_size + max_align - + TLS_PRE_TCB_SIZE - + GLRO(dl_tls_static_surplus)); - if (tlsblock == NULL) - _startup_fatal_tls_error (); - tlsblock += TLS_PRE_TCB_SIZE; -#else - /* In case a model with a different layout for the TCB and DTV - is defined add another #elif here and in the following #ifs. */ -# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" -#endif - - /* Align the TLS block. */ - tlsblock = (void *) (((uintptr_t) tlsblock + max_align - 1) - & ~(max_align - 1)); + /* See _dl_allocate_tls_storage in elf/dl-tls.c. */ + void *tcbp; + { + size_t size = _dl_tls_block_size_with_pre (); + void *allocated = _dl_early_allocate (size + GLRO (dl_tls_static_align)); + if (allocated == NULL) + _startup_fatal_tls_error (); + tcbp = _dl_tls_block_align (size, allocated); + } /* Initialize the dtv. [0] is the length, [1] the generation counter. */ - _dl_static_dtv[0].counter = (sizeof (_dl_static_dtv) / sizeof (_dl_static_dtv[0])) - 2; - // _dl_static_dtv[1].counter = 0; would be needed if not already done + _dl_static_dtv[0].counter = array_length (_dl_static_dtv) - 2; - /* Initialize the TLS block. */ -#if TLS_TCB_AT_TP - _dl_static_dtv[2].pointer.val = ((char *) tlsblock + tcb_offset - - roundup (memsz, align ?: 1)); - main_map->l_tls_offset = roundup (memsz, align ?: 1); -#elif TLS_DTV_AT_TP - _dl_static_dtv[2].pointer.val = (char *) tlsblock + tcb_offset; - main_map->l_tls_offset = tcb_offset; -#else -# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" -#endif - _dl_static_dtv[2].pointer.to_free = NULL; - /* sbrk gives us zero'd memory, so we don't need to clear the remainder. */ - memcpy (_dl_static_dtv[2].pointer.val, initimage, filesz); + /* Install the pointer to the DTV. See allocate_dtv in elf/dl-tls.c. */ + INSTALL_DTV (tcbp, _dl_static_dtv); - /* Install the pointer to the dtv. */ + /* _dl_allocate_tls_init uses recursive locking and the TCB, so this + has to come first. */ + call_tls_init_tp (tcbp); - /* Initialize the thread pointer. */ -#if TLS_TCB_AT_TP - INSTALL_DTV ((char *) tlsblock + tcb_offset, _dl_static_dtv); - - call_tls_init_tp ((char *) tlsblock + tcb_offset); -#elif TLS_DTV_AT_TP - INSTALL_DTV (tlsblock, _dl_static_dtv); - call_tls_init_tp (tlsblock); -#endif - - /* Update the executable's link map with enough information to make - the TLS routines happy. */ - main_map->l_tls_align = align; - main_map->l_tls_blocksize = memsz; - main_map->l_tls_initimage = initimage; - main_map->l_tls_initimage_size = filesz; - main_map->l_tls_modid = 1; - - init_slotinfo (); - /* static_slotinfo.slotinfo[1].gen = 0; -- Already zero. */ - static_slotinfo.slotinfo[1].map = main_map; - - init_static_tls (tls_blocks_size, MAX (TCB_ALIGNMENT, max_align)); + /* Initialize the TLS image for the allocated TCB. */ + _dl_allocate_tls_init (tcbp, true); } diff --git a/elf/dl-tls.c b/elf/dl-tls.c index 647deaf6d0..8306a39e8d 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -28,6 +28,7 @@ #include #include #include +#include #if PTHREAD_IN_LIBC # include @@ -237,7 +238,6 @@ _dl_count_modids (void) } -#ifdef SHARED void _dl_determine_tlsoffset (void) { @@ -446,7 +446,6 @@ _dl_determine_tlsoffset (void) /* The alignment requirement for the static TLS block. */ GLRO (dl_tls_static_align) = max_align; } -#endif /* SHARED */ static void * allocate_dtv (void *result) @@ -508,55 +507,20 @@ tcb_to_pointer_to_free_location (void *tcb) void * _dl_allocate_tls_storage (void) { - void *result; - size_t size = GLRO (dl_tls_static_size); + size_t size = _dl_tls_block_size_with_pre (); -#if TLS_DTV_AT_TP - /* Memory layout is: - [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ] - ^ This should be returned. */ - size += TLS_PRE_TCB_SIZE; -#endif - - /* Reserve space for the required alignment and the pointer to the - original allocation. */ - size_t alignment = GLRO (dl_tls_static_align); - - /* Perform the allocation. */ + /* Perform the allocation. Reserve space for alignment storage of + the pointer that will have to be freed. */ _dl_tls_allocate_begin (); - void *allocated = malloc (size + alignment + sizeof (void *)); + void *allocated = malloc (size + GLRO (dl_tls_static_align) + + sizeof (void *)); if (__glibc_unlikely (allocated == NULL)) { _dl_tls_allocate_end (); return NULL; } - /* Perform alignment and allocate the DTV. */ -#if TLS_TCB_AT_TP - /* The TCB follows the TLS blocks, which determine the alignment. - (TCB alignment requirements have been taken into account when - calculating GLRO (dl_tls_static_align).) */ - void *aligned = (void *) roundup ((uintptr_t) allocated, alignment); - result = aligned + size - TLS_TCB_SIZE; - - /* Clear the TCB data structure. We can't ask the caller (i.e. - libpthread) to do it, because we will initialize the DTV et al. */ - memset (result, '\0', TLS_TCB_SIZE); -#elif TLS_DTV_AT_TP - /* Pre-TCB and TCB come before the TLS blocks. The layout computed - in _dl_determine_tlsoffset assumes that the TCB is aligned to the - TLS block alignment, and not just the TLS blocks after it. This - can leave an unused alignment gap between the TCB and the TLS - blocks. */ - result = (void *) roundup - (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated, - alignment); - - /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before - it. We can't ask the caller (i.e. libpthread) to do it, because - we will initialize the DTV et al. */ - memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE); -#endif + void *result = _dl_tls_block_align (size, allocated); /* Record the value of the original pointer for later deallocation. */ diff --git a/elf/dl-tls_block_align.h b/elf/dl-tls_block_align.h new file mode 100644 index 0000000000..82016f3bb5 --- /dev/null +++ b/elf/dl-tls_block_align.h @@ -0,0 +1,70 @@ +/* Computation of TLS block size and its alignment. + Copyright (C) 2002-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Compute the size of the TLS block for memory allocation. */ +static size_t +_dl_tls_block_size_with_pre (void) +{ + size_t size = GLRO (dl_tls_static_size); + +#if TLS_DTV_AT_TP + /* Memory layout is: + [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ] + ^ This should be returned. */ + size += TLS_PRE_TCB_SIZE; +#endif + return size; +} + +/* SIZE must be the value returned by _dl_tls_block_size_with_pre. + ALLOCATED must point to at least SIZE + GLRO (dl_tls_static_align) + bytes. */ +static void * +_dl_tls_block_align (size_t size, void *allocated) +{ + void *result; + size_t alignment = GLRO (dl_tls_static_align); + +#if TLS_TCB_AT_TP + /* The TCB follows the TLS blocks, which determine the alignment. + (TCB alignment requirements have been taken into account when + calculating GLRO (dl_tls_static_align).) */ + void *aligned = (void *) roundup ((uintptr_t) allocated, alignment); + result = aligned + size - TLS_TCB_SIZE; + + /* Clear the TCB data structure. We can't ask the caller (i.e. + libpthread) to do it, because we will initialize the DTV et al. */ + memset (result, '\0', TLS_TCB_SIZE); +#elif TLS_DTV_AT_TP + /* Pre-TCB and TCB come before the TLS blocks. The layout computed + in _dl_determine_tlsoffset assumes that the TCB is aligned to the + TLS block alignment, and not just the TLS blocks after it. This + can leave an unused alignment gap between the TCB and the TLS + blocks. */ + result = (void *) roundup + (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated, + alignment); + + /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before + it. We can't ask the caller (i.e. libpthread) to do it, because + we will initialize the DTV et al. */ + memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE); +#endif + + return result; +}