elf: Implement a string table for ldconfig, with tail merging

This will be used in ldconfig to reduce the ld.so.cache size slightly.

Tail merging is an optimization where a pointer points into another
string if the first string is a suffix of the second string.

The hash function FNV-1a was chosen because it is simple and achieves
good dispersion even for short strings (so that the hash table bucket
count can be a power of two).  It is clearly superior to the hsearch
hash and the ELF hash in this regard.

The hash table uses chaining for collision resolution.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
This commit is contained in:
Florian Weimer 2020-12-04 09:13:43 +01:00
parent dfb3f101c5
commit 785969a047
5 changed files with 488 additions and 1 deletions

View File

@ -174,7 +174,7 @@ tests-container := \
tests := tst-tls9 tst-leaks1 \
tst-array1 tst-array2 tst-array3 tst-array4 tst-array5 \
tst-auxv
tst-auxv tst-stringtable
tests-internal := tst-tls1 tst-tls2 $(tests-static-internal)
tests-static := $(tests-static-normal) $(tests-static-internal)

209
elf/stringtable.c Normal file
View File

@ -0,0 +1,209 @@
/* String tables for ld.so.cache construction. Implementation.
Copyright (C) 2020 Free Software Foundation, Inc.
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation; version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <https://www.gnu.org/licenses/>. */
#include <assert.h>
#include <error.h>
#include <ldconfig.h>
#include <libintl.h>
#include <stdlib.h>
#include <string.h>
#include <stringtable.h>
static void
stringtable_init (struct stringtable *table)
{
table->count = 0;
/* This needs to be a power of two. 128 is sufficient to keep track
of 42 DSOs without resizing (assuming two strings per DSOs).
glibc itself comes with more than 20 DSOs, so 64 would likely to
be too small. */
table->allocated = 128;
table->entries = xcalloc (table->allocated, sizeof (table->entries[0]));
}
/* 32-bit FNV-1a hash function. */
static uint32_t
fnv1a (const char *string, size_t length)
{
const unsigned char *p = (const unsigned char *) string;
uint32_t hash = 2166136261U;
for (size_t i = 0; i < length; ++i)
{
hash ^= p[i];
hash *= 16777619U;
}
return hash;
}
/* Double the capacity of the hash table. */
static void
stringtable_rehash (struct stringtable *table)
{
/* This computation cannot overflow because the old total in-memory
size of the hash table is larger than the computed value. */
uint32_t new_allocated = table->allocated * 2;
struct stringtable_entry **new_entries
= xcalloc (new_allocated, sizeof (table->entries[0]));
uint32_t mask = new_allocated - 1;
for (uint32_t i = 0; i < table->allocated; ++i)
for (struct stringtable_entry *e = table->entries[i]; e != NULL; )
{
struct stringtable_entry *next = e->next;
uint32_t hash = fnv1a (e->string, e->length);
uint32_t new_index = hash & mask;
e->next = new_entries[new_index];
new_entries[new_index] = e;
e = next;
}
free (table->entries);
table->entries = new_entries;
table->allocated = new_allocated;
}
struct stringtable_entry *
stringtable_add (struct stringtable *table, const char *string)
{
/* Check for a zero-initialized table. */
if (table->allocated == 0)
stringtable_init (table);
size_t length = strlen (string);
if (length > (1U << 30))
error (EXIT_FAILURE, 0, _("String table string is too long"));
uint32_t hash = fnv1a (string, length);
/* Return a previously-existing entry. */
for (struct stringtable_entry *e
= table->entries[hash & (table->allocated - 1)];
e != NULL; e = e->next)
if (e->length == length && memcmp (e->string, string, length) == 0)
return e;
/* Increase the size of the table if necessary. Keep utilization
below two thirds. */
if (table->count >= (1U << 30))
error (EXIT_FAILURE, 0, _("String table has too many entries"));
if (table->count * 3 > table->allocated * 2)
stringtable_rehash (table);
/* Add the new table entry. */
++table->count;
struct stringtable_entry *e
= xmalloc (offsetof (struct stringtable_entry, string) + length + 1);
uint32_t index = hash & (table->allocated - 1);
e->next = table->entries[index];
table->entries[index] = e;
e->length = length;
e->offset = 0;
memcpy (e->string, string, length + 1);
return e;
}
/* Sort reversed strings in reverse lexicographic order. This is used
for tail merging. */
static int
finalize_compare (const void *l, const void *r)
{
struct stringtable_entry *left = *(struct stringtable_entry **) l;
struct stringtable_entry *right = *(struct stringtable_entry **) r;
size_t to_compare;
if (left->length < right->length)
to_compare = left->length;
else
to_compare = right->length;
for (size_t i = 1; i <= to_compare; ++i)
{
unsigned char lch = left->string[left->length - i];
unsigned char rch = right->string[right->length - i];
if (lch != rch)
return rch - lch;
}
if (left->length == right->length)
return 0;
else if (left->length < right->length)
/* Longer strings should come first. */
return 1;
else
return -1;
}
void
stringtable_finalize (struct stringtable *table,
struct stringtable_finalized *result)
{
if (table->count == 0)
{
result->strings = xstrdup ("");
result->size = 0;
return;
}
/* Optimize the order of the strings. */
struct stringtable_entry **array = xcalloc (table->count, sizeof (*array));
{
size_t j = 0;
for (uint32_t i = 0; i < table->allocated; ++i)
for (struct stringtable_entry *e = table->entries[i]; e != NULL;
e = e->next)
{
array[j] = e;
++j;
}
assert (j == table->count);
}
qsort (array, table->count, sizeof (*array), finalize_compare);
/* Assign offsets, using tail merging (sharing suffixes) if possible. */
array[0]->offset = 0;
for (uint32_t j = 1; j < table->count; ++j)
{
struct stringtable_entry *previous = array[j - 1];
struct stringtable_entry *current = array[j];
if (previous->length >= current->length
&& memcmp (&previous->string[previous->length - current->length],
current->string, current->length) == 0)
current->offset = (previous->offset + previous->length
- current->length);
else if (__builtin_add_overflow (previous->offset,
previous->length + 1,
&current->offset))
error (EXIT_FAILURE, 0, _("String table is too large"));
}
/* Allocate the result string. */
{
struct stringtable_entry *last = array[table->count - 1];
if (__builtin_add_overflow (last->offset, last->length + 1,
&result->size))
error (EXIT_FAILURE, 0, _("String table is too large"));
}
/* The strings are copied from the hash table, so the array is no
longer needed. */
free (array);
result->strings = xcalloc (result->size, 1);
/* Copy the strings. */
for (uint32_t i = 0; i < table->allocated; ++i)
for (struct stringtable_entry *e = table->entries[i]; e != NULL;
e = e->next)
if (result->strings[e->offset] == '\0')
memcpy (&result->strings[e->offset], e->string, e->length + 1);
}

64
elf/stringtable.h Normal file
View File

@ -0,0 +1,64 @@
/* String tables for ld.so.cache construction.
Copyright (C) 2020 Free Software Foundation, Inc.
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation; version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <https://www.gnu.org/licenses/>. */
#ifndef _STRINGTABLE_H
#define _STRINGTABLE_H
#include <stddef.h>
#include <stdint.h>
/* An entry in the string table. Only the length and string fields are
expected to be used outside the string table code. */
struct stringtable_entry
{
struct stringtable_entry *next; /* For collision resolution. */
uint32_t length; /* Length of then string. */
uint32_t offset; /* From start of finalized table. */
char string[]; /* Null-terminated string. */
};
/* A string table. Zero-initialization produces a valid atable. */
struct stringtable
{
struct stringtable_entry **entries; /* Array of hash table buckets. */
uint32_t count; /* Number of elements in the table. */
uint32_t allocated; /* Length of the entries array. */
};
/* Adds STRING to TABLE. May return the address of an existing entry. */
struct stringtable_entry *stringtable_add (struct stringtable *table,
const char *string);
/* Result of stringtable_finalize. SIZE bytes at STRINGS should be
written to the file. */
struct stringtable_finalized
{
char *strings;
size_t size;
};
/* Assigns offsets to string table entries and computes the serialized
form of the string table. */
void stringtable_finalize (struct stringtable *table,
struct stringtable_finalized *result);
/* Deallocate the string table (but not the TABLE pointer itself).
(The table can be re-used for adding more strings without
initialization.) */
void stringtable_free (struct stringtable *table);
#endif /* _STRINGTABLE_H */

33
elf/stringtable_free.c Normal file
View File

@ -0,0 +1,33 @@
/* String tables for ld.so.cache construction. Deallocation (for tests only).
Copyright (C) 2020 Free Software Foundation, Inc.
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation; version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <https://www.gnu.org/licenses/>. */
#include <stdlib.h>
#include <stringtable.h>
void
stringtable_free (struct stringtable *table)
{
for (uint32_t i = 0; i < table->allocated; ++i)
for (struct stringtable_entry *e = table->entries[i]; e != NULL; )
{
struct stringtable_entry *next = e->next;
free (e);
e = next;
}
free (table->entries);
*table = (struct stringtable) { 0, };
}

181
elf/tst-stringtable.c Normal file
View File

@ -0,0 +1,181 @@
/* Unit test for ldconfig string tables.
Copyright (C) 2020 Free Software Foundation, Inc.
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation; version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <https://www.gnu.org/licenses/>. */
#include <array_length.h>
#include <stdlib.h>
#include <string.h>
#include <stringtable.h>
#include <support/check.h>
#include <support/support.h>
static int
do_test (void)
{
/* Empty string table. */
{
struct stringtable s = { 0, };
struct stringtable_finalized f;
stringtable_finalize (&s, &f);
TEST_COMPARE_STRING (f.strings, "");
TEST_COMPARE (f.size, 0);
free (f.strings);
stringtable_free (&s);
}
/* String table with one empty string. */
{
struct stringtable s = { 0, };
struct stringtable_entry *e = stringtable_add (&s, "");
TEST_COMPARE_STRING (e->string, "");
TEST_COMPARE (e->length, 0);
TEST_COMPARE (s.count, 1);
struct stringtable_finalized f;
stringtable_finalize (&s, &f);
TEST_COMPARE (e->offset, 0);
TEST_COMPARE_STRING (f.strings, "");
TEST_COMPARE (f.size, 1);
free (f.strings);
stringtable_free (&s);
}
/* String table with one non-empty string. */
{
struct stringtable s = { 0, };
struct stringtable_entry *e = stringtable_add (&s, "name");
TEST_COMPARE_STRING (e->string, "name");
TEST_COMPARE (e->length, 4);
TEST_COMPARE (s.count, 1);
struct stringtable_finalized f;
stringtable_finalize (&s, &f);
TEST_COMPARE (e->offset, 0);
TEST_COMPARE_STRING (f.strings, "name");
TEST_COMPARE (f.size, 5);
free (f.strings);
stringtable_free (&s);
}
/* Two strings, one is a prefix of the other. Tail-merging can only
happen in one way in this case. */
{
struct stringtable s = { 0, };
struct stringtable_entry *suffix = stringtable_add (&s, "suffix");
TEST_COMPARE_STRING (suffix->string, "suffix");
TEST_COMPARE (suffix->length, 6);
TEST_COMPARE (s.count, 1);
struct stringtable_entry *prefix
= stringtable_add (&s, "prefix-suffix");
TEST_COMPARE_STRING (prefix->string, "prefix-suffix");
TEST_COMPARE (prefix->length, strlen ("prefix-suffix"));
TEST_COMPARE (s.count, 2);
struct stringtable_finalized f;
stringtable_finalize (&s, &f);
TEST_COMPARE (prefix->offset, 0);
TEST_COMPARE (suffix->offset, strlen ("prefix-"));
TEST_COMPARE_STRING (f.strings, "prefix-suffix");
TEST_COMPARE (f.size, sizeof ("prefix-suffix"));
free (f.strings);
stringtable_free (&s);
}
/* String table with various shared prefixes. Triggers hash
resizing. */
{
enum { count = 1500 };
char *strings[2 * count];
struct stringtable_entry *entries[2 * count];
struct stringtable s = { 0, };
for (int i = 0; i < count; ++i)
{
strings[i] = xasprintf ("%d", i);
entries[i] = stringtable_add (&s, strings[i]);
TEST_COMPARE (entries[i]->length, strlen (strings[i]));
TEST_COMPARE_STRING (entries[i]->string, strings[i]);
strings[i + count] = xasprintf ("prefix/%d", i);
entries[i + count] = stringtable_add (&s, strings[i + count]);
TEST_COMPARE (entries[i + count]->length, strlen (strings[i + count]));
TEST_COMPARE_STRING (entries[i + count]->string, strings[i + count]);
}
struct stringtable_finalized f;
stringtable_finalize (&s, &f);
for (int i = 0; i < 2 * count; ++i)
{
TEST_COMPARE (entries[i]->length, strlen (strings[i]));
TEST_COMPARE_STRING (entries[i]->string, strings[i]);
TEST_COMPARE_STRING (f.strings + entries[i]->offset, strings[i]);
free (strings[i]);
}
free (f.strings);
stringtable_free (&s);
}
/* Verify that maximum tail merging happens. */
{
struct stringtable s = { 0, };
const char *strings[] = {
"",
"a",
"b",
"aa",
"aaa",
"aa",
"bb",
"b",
"a",
"ba",
"baa",
};
struct stringtable_entry *entries[array_length (strings)];
for (int i = 0; i < array_length (strings); ++i)
entries[i] = stringtable_add (&s, strings[i]);
for (int i = 0; i < array_length (strings); ++i)
TEST_COMPARE_STRING (entries[i]->string, strings[i]);
struct stringtable_finalized f;
stringtable_finalize (&s, &f);
/* There are only four different strings, "aaa", "ba", "baa",
"bb". The rest is shared in an unspecified fashion. */
TEST_COMPARE (f.size, 4 + 3 + 4 + 3);
for (int i = 0; i < array_length (strings); ++i)
{
TEST_COMPARE_STRING (entries[i]->string, strings[i]);
TEST_COMPARE_STRING (f.strings + entries[i]->offset, strings[i]);
}
free (f.strings);
stringtable_free (&s);
}
return 0;
}
#include <support/test-driver.c>
/* Re-compile the string table implementation here. It is not
possible to link against the actual build because it was built for
use in ldconfig. */
#define _(arg) arg
#include "stringtable.c"
#include "stringtable_free.c"