mirror of
git://sourceware.org/git/glibc.git
synced 2024-12-03 04:01:43 +08:00
dfa3394a60
In qsort_r we allocate a buffer sized QSORT_STACK_SIZE (1024) on stack and we intend to use it if all elements can fit into it. But there is a typo: if (total_size < sizeof buf) buf = tmp; else /* allocate a buffer on heap and use it ... */ Here "buf" is a pointer, thus sizeof buf is just 4 or 8, instead of 1024. There is also a minor issue that we should use "<=" instead of "<". This bug is detected debugging some strange heap corruption running the Ruby-3.3.0 test suite (on an experimental Linux From Scratch build using Binutils-2.41.90 and Glibc trunk, and also Fedora Rawhide [1]). It seems Ruby is doing some wild "optimization" by jumping into somewhere in qsort_r instead of calling it normally, resulting in a double free of buf if we allocate it on heap. The issue can be reproduced deterministically with: LD_PRELOAD=/usr/lib/libc_malloc_debug.so MALLOC_CHECK_=3 \ LD_LIBRARY_PATH=. ./ruby test/runner.rb test/ruby/test_enum.rb in Ruby-3.3.0 tree after building it. This change would hide the issue for Ruby, but Ruby is likely still buggy (if using this "optimization" sorting larger arrays). [1]:https://kojipkgs.fedoraproject.org/work/tasks/9729/111889729/build.log Signed-off-by: Xi Ruoyao <xry111@xry111.site>
408 lines
9.9 KiB
C
408 lines
9.9 KiB
C
/* Copyright (C) 1991-2024 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
/* If you consider tuning this algorithm, you should consult first:
|
|
Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
|
|
Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
|
|
|
|
#include <errno.h>
|
|
#include <limits.h>
|
|
#include <memswap.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdbool.h>
|
|
|
|
/* Swap SIZE bytes between addresses A and B. These helpers are provided
|
|
along the generic one as an optimization. */
|
|
|
|
enum swap_type_t
|
|
{
|
|
SWAP_WORDS_64,
|
|
SWAP_WORDS_32,
|
|
SWAP_VOID_ARG,
|
|
SWAP_BYTES
|
|
};
|
|
|
|
typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
|
|
typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
|
|
|
|
static inline void
|
|
swap_words_64 (void * restrict a, void * restrict b, size_t n)
|
|
{
|
|
do
|
|
{
|
|
n -= 8;
|
|
u64_alias_t t = *(u64_alias_t *)(a + n);
|
|
*(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n);
|
|
*(u64_alias_t *)(b + n) = t;
|
|
} while (n);
|
|
}
|
|
|
|
static inline void
|
|
swap_words_32 (void * restrict a, void * restrict b, size_t n)
|
|
{
|
|
do
|
|
{
|
|
n -= 4;
|
|
u32_alias_t t = *(u32_alias_t *)(a + n);
|
|
*(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n);
|
|
*(u32_alias_t *)(b + n) = t;
|
|
} while (n);
|
|
}
|
|
|
|
/* Replace the indirect call with a serie of if statements. It should help
|
|
the branch predictor. */
|
|
static void
|
|
do_swap (void * restrict a, void * restrict b, size_t size,
|
|
enum swap_type_t swap_type)
|
|
{
|
|
if (swap_type == SWAP_WORDS_64)
|
|
swap_words_64 (a, b, size);
|
|
else if (swap_type == SWAP_WORDS_32)
|
|
swap_words_32 (a, b, size);
|
|
else
|
|
__memswap (a, b, size);
|
|
}
|
|
|
|
/* Establish the heap condition at index K, that is, the key at K will
|
|
not be less than either of its children, at 2 * K + 1 and 2 * K + 2
|
|
(if they exist). N is the last valid index. */
|
|
static inline void
|
|
siftdown (void *base, size_t size, size_t k, size_t n,
|
|
enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg)
|
|
{
|
|
/* There can only be a heap condition violation if there are
|
|
children. */
|
|
while (2 * k + 1 <= n)
|
|
{
|
|
/* Left child. */
|
|
size_t j = 2 * k + 1;
|
|
/* If the right child is larger, use it. */
|
|
if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0)
|
|
j++;
|
|
|
|
/* If k is already >= to its children, we are done. */
|
|
if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0)
|
|
break;
|
|
|
|
/* Heal the violation. */
|
|
do_swap (base + (size * j), base + (k * size), size, swap_type);
|
|
|
|
/* Swapping with j may have introduced a violation at j. Fix
|
|
it in the next loop iteration. */
|
|
k = j;
|
|
}
|
|
}
|
|
|
|
/* Establish the heap condition for the indices 0 to N (inclusive). */
|
|
static inline void
|
|
heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
|
|
__compar_d_fn_t cmp, void *arg)
|
|
{
|
|
/* If n is odd, k = n / 2 has a left child at n, so this is the
|
|
largest index that can have a heap condition violation regarding
|
|
its children. */
|
|
size_t k = n / 2;
|
|
while (1)
|
|
{
|
|
siftdown (base, size, k, n, swap_type, cmp, arg);
|
|
if (k-- == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
static enum swap_type_t
|
|
get_swap_type (void *const pbase, size_t size)
|
|
{
|
|
if ((size & (sizeof (uint32_t) - 1)) == 0
|
|
&& ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0)
|
|
{
|
|
if (size == sizeof (uint32_t))
|
|
return SWAP_WORDS_32;
|
|
else if (size == sizeof (uint64_t)
|
|
&& ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0)
|
|
return SWAP_WORDS_64;
|
|
}
|
|
return SWAP_BYTES;
|
|
}
|
|
|
|
|
|
/* A non-recursive heapsort with worst-case performance of O(nlog n) and
|
|
worst-case space complexity of O(1). It sorts the array starting at
|
|
BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
|
|
function used to swap elements, and CMP is the function used to compare
|
|
elements. */
|
|
static void
|
|
heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg)
|
|
{
|
|
if (n == 0)
|
|
return;
|
|
|
|
enum swap_type_t swap_type = get_swap_type (base, size);
|
|
|
|
/* Build the binary heap, largest value at the base[0]. */
|
|
heapify (base, size, n, swap_type, cmp, arg);
|
|
|
|
while (true)
|
|
{
|
|
/* Indices 0 .. n contain the binary heap. Extract the largest
|
|
element put it into the final position in the array. */
|
|
do_swap (base, base + (n * size), size, swap_type);
|
|
|
|
/* The heap is now one element shorter. */
|
|
n--;
|
|
if (n == 0)
|
|
break;
|
|
|
|
/* By swapping in elements 0 and the previous value of n (now at
|
|
n + 1), we likely introduced a heap condition violation. Fix
|
|
it for the reduced heap. */
|
|
siftdown (base, size, 0, n, swap_type, cmp, arg);
|
|
}
|
|
}
|
|
|
|
/* The maximum size in bytes required by mergesort that will be provided
|
|
through a buffer allocated in the stack. */
|
|
#define QSORT_STACK_SIZE 1024
|
|
|
|
/* Elements larger than this value will be sorted through indirect sorting
|
|
to minimize the need to memory swap calls. */
|
|
#define INDIRECT_SORT_SIZE_THRES 32
|
|
|
|
struct msort_param
|
|
{
|
|
size_t s;
|
|
enum swap_type_t var;
|
|
__compar_d_fn_t cmp;
|
|
void *arg;
|
|
char *t;
|
|
};
|
|
|
|
static void
|
|
msort_with_tmp (const struct msort_param *p, void *b, size_t n)
|
|
{
|
|
char *b1, *b2;
|
|
size_t n1, n2;
|
|
|
|
if (n <= 1)
|
|
return;
|
|
|
|
n1 = n / 2;
|
|
n2 = n - n1;
|
|
b1 = b;
|
|
b2 = (char *) b + (n1 * p->s);
|
|
|
|
msort_with_tmp (p, b1, n1);
|
|
msort_with_tmp (p, b2, n2);
|
|
|
|
char *tmp = p->t;
|
|
const size_t s = p->s;
|
|
__compar_d_fn_t cmp = p->cmp;
|
|
void *arg = p->arg;
|
|
switch (p->var)
|
|
{
|
|
case SWAP_WORDS_32:
|
|
while (n1 > 0 && n2 > 0)
|
|
{
|
|
if (cmp (b1, b2, arg) <= 0)
|
|
{
|
|
*(u32_alias_t *) tmp = *(u32_alias_t *) b1;
|
|
b1 += sizeof (u32_alias_t);
|
|
--n1;
|
|
}
|
|
else
|
|
{
|
|
*(u32_alias_t *) tmp = *(u32_alias_t *) b2;
|
|
b2 += sizeof (u32_alias_t);
|
|
--n2;
|
|
}
|
|
tmp += sizeof (u32_alias_t);
|
|
}
|
|
break;
|
|
case SWAP_WORDS_64:
|
|
while (n1 > 0 && n2 > 0)
|
|
{
|
|
if (cmp (b1, b2, arg) <= 0)
|
|
{
|
|
*(u64_alias_t *) tmp = *(u64_alias_t *) b1;
|
|
b1 += sizeof (u64_alias_t);
|
|
--n1;
|
|
}
|
|
else
|
|
{
|
|
*(u64_alias_t *) tmp = *(u64_alias_t *) b2;
|
|
b2 += sizeof (u64_alias_t);
|
|
--n2;
|
|
}
|
|
tmp += sizeof (u64_alias_t);
|
|
}
|
|
break;
|
|
case SWAP_VOID_ARG:
|
|
while (n1 > 0 && n2 > 0)
|
|
{
|
|
if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
|
|
{
|
|
*(void **) tmp = *(void **) b1;
|
|
b1 += sizeof (void *);
|
|
--n1;
|
|
}
|
|
else
|
|
{
|
|
*(void **) tmp = *(void **) b2;
|
|
b2 += sizeof (void *);
|
|
--n2;
|
|
}
|
|
tmp += sizeof (void *);
|
|
}
|
|
break;
|
|
default:
|
|
while (n1 > 0 && n2 > 0)
|
|
{
|
|
if (cmp (b1, b2, arg) <= 0)
|
|
{
|
|
tmp = (char *) __mempcpy (tmp, b1, s);
|
|
b1 += s;
|
|
--n1;
|
|
}
|
|
else
|
|
{
|
|
tmp = (char *) __mempcpy (tmp, b2, s);
|
|
b2 += s;
|
|
--n2;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (n1 > 0)
|
|
memcpy (tmp, b1, n1 * s);
|
|
memcpy (b, p->t, (n - n2) * s);
|
|
}
|
|
|
|
static void
|
|
__attribute_used__
|
|
indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n,
|
|
size_t s)
|
|
{
|
|
/* Indirect sorting. */
|
|
char *ip = (char *) b;
|
|
void **tp = (void **) (p->t + n * sizeof (void *));
|
|
void **t = tp;
|
|
void *tmp_storage = (void *) (tp + n);
|
|
|
|
while ((void *) t < tmp_storage)
|
|
{
|
|
*t++ = ip;
|
|
ip += s;
|
|
}
|
|
msort_with_tmp (p, p->t + n * sizeof (void *), n);
|
|
|
|
/* tp[0] .. tp[n - 1] is now sorted, copy around entries of
|
|
the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
|
|
char *kp;
|
|
size_t i;
|
|
for (i = 0, ip = (char *) b; i < n; i++, ip += s)
|
|
if ((kp = tp[i]) != ip)
|
|
{
|
|
size_t j = i;
|
|
char *jp = ip;
|
|
memcpy (tmp_storage, ip, s);
|
|
|
|
do
|
|
{
|
|
size_t k = (kp - (char *) b) / s;
|
|
tp[j] = jp;
|
|
memcpy (jp, kp, s);
|
|
j = k;
|
|
jp = kp;
|
|
kp = tp[k];
|
|
}
|
|
while (kp != ip);
|
|
|
|
tp[j] = jp;
|
|
memcpy (jp, tmp_storage, s);
|
|
}
|
|
}
|
|
|
|
void
|
|
__qsort_r (void *const pbase, size_t total_elems, size_t size,
|
|
__compar_d_fn_t cmp, void *arg)
|
|
{
|
|
if (total_elems <= 1)
|
|
return;
|
|
|
|
/* Align to the maximum size used by the swap optimization. */
|
|
_Alignas (uint64_t) char tmp[QSORT_STACK_SIZE];
|
|
size_t total_size = total_elems * size;
|
|
char *buf;
|
|
|
|
if (size > INDIRECT_SORT_SIZE_THRES)
|
|
total_size = 2 * total_elems * sizeof (void *) + size;
|
|
|
|
if (total_size <= sizeof tmp)
|
|
buf = tmp;
|
|
else
|
|
{
|
|
int save = errno;
|
|
buf = malloc (total_size);
|
|
__set_errno (save);
|
|
if (buf == NULL)
|
|
{
|
|
/* Fallback to heapsort in case of memory failure. */
|
|
heapsort_r (pbase, total_elems - 1, size, cmp, arg);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (size > INDIRECT_SORT_SIZE_THRES)
|
|
{
|
|
const struct msort_param msort_param =
|
|
{
|
|
.s = sizeof (void *),
|
|
.cmp = cmp,
|
|
.arg = arg,
|
|
.var = SWAP_VOID_ARG,
|
|
.t = buf,
|
|
};
|
|
indirect_msort_with_tmp (&msort_param, pbase, total_elems, size);
|
|
}
|
|
else
|
|
{
|
|
const struct msort_param msort_param =
|
|
{
|
|
.s = size,
|
|
.cmp = cmp,
|
|
.arg = arg,
|
|
.var = get_swap_type (pbase, size),
|
|
.t = buf,
|
|
};
|
|
msort_with_tmp (&msort_param, pbase, total_elems);
|
|
}
|
|
|
|
if (buf != tmp)
|
|
free (buf);
|
|
}
|
|
libc_hidden_def (__qsort_r)
|
|
weak_alias (__qsort_r, qsort_r)
|
|
|
|
void
|
|
qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
|
|
{
|
|
return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL);
|
|
}
|
|
libc_hidden_def (qsort)
|