From dcea6a95d78370c8b4ac3c0033d9f15aaabf31bf Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 14 Oct 2019 13:35:21 +1030 Subject: [PATCH] qsort issues qsort isn't guaranteed to be a stable sort, that is, elements comparing equal according to the comparison function may be reordered relative to their original ordering. Of course sometimes you may not care, but even in those cases it is good to force some ordering (ie. not have the comparison function return 0) so that linker output is reproducible over different libc qsort implementations. One way to make qsort stable (which the glibc manual incorrectly says is the only way) is to augment the elements being sorted with a monotonic counter of some kind, and use that counter as the final arbiter of ordering in the comparison function. Another way is to set up an array of pointers into the array of elements, first pointer to first element, second pointer to second element and so so, and sort the pointer array rather than the element array. Final arbiter in the comparison function then is the pointer difference. This works well with, for example, the symbol pointers returned by _bfd_elf_canonicalize_symtab which point into a symbol array. This patch fixes a few places where sorting by symbol pointers is appropriate, and adds comments where qsort stability is a non-issue. * elf-strtab.c (strrevcmp): Comment. * merge.c (strrevcmp): Likewise. * elf64-ppc.c (compare_symbols): Correct final pointer comparison. Comment on why comparing pointers ensures a stable sort. * elflink.c (struct elf_symbol): Add void* to union. (elf_sort_elf_symbol): Ensure a stable sort with pointer comparison. (elf_sym_name_compare): Likewise. (bfd_elf_match_symbols_in_sections): Style fix. (elf_link_sort_cmp1): Comment. --- bfd/ChangeLog | 12 ++++++++++++ bfd/elf-strtab.c | 4 +++- bfd/elf64-ppc.c | 12 +++++++++++- bfd/elflink.c | 37 +++++++++++++++++++++++++++++++------ bfd/merge.c | 3 +++ 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/bfd/ChangeLog b/bfd/ChangeLog index a864b8beb34..8d4404ebf1b 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,15 @@ +2019-10-14 Alan Modra + + * elf-strtab.c (strrevcmp): Comment. + * merge.c (strrevcmp): Likewise. + * elf64-ppc.c (compare_symbols): Correct final pointer comparison. + Comment on why comparing pointers ensures a stable sort. + * elflink.c (struct elf_symbol): Add void* to union. + (elf_sort_elf_symbol): Ensure a stable sort with pointer comparison. + (elf_sym_name_compare): Likewise. + (bfd_elf_match_symbols_in_sections): Style fix. + (elf_link_sort_cmp1): Comment. + 2019-10-14 Alan Modra PR 24955 diff --git a/bfd/elf-strtab.c b/bfd/elf-strtab.c index cc1dcb3f50a..55e138a6319 100644 --- a/bfd/elf-strtab.c +++ b/bfd/elf-strtab.c @@ -334,7 +334,9 @@ _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab) return TRUE; } -/* Compare two elf_strtab_hash_entry structures. Called via qsort. */ +/* Compare two elf_strtab_hash_entry structures. Called via qsort. + Won't ever return zero as all entries differ, so there is no issue + with qsort stability here. */ static int strrevcmp (const void *a, const void *b) diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c index 32ed81d98e3..2731e78618d 100644 --- a/bfd/elf64-ppc.c +++ b/bfd/elf64-ppc.c @@ -2101,7 +2101,17 @@ compare_symbols (const void *ap, const void *bp) if ((a->flags & BSF_DYNAMIC) == 0 && (b->flags & BSF_DYNAMIC) != 0) return 1; - return a > b; + /* Finally, sort on where the symbol is in memory. The symbols will + be in at most two malloc'd blocks, one for static syms, one for + dynamic syms, and we distinguish the two blocks above by testing + BSF_DYNAMIC. Since we are sorting the symbol pointers which were + originally in the same order as the symbols (and we're not + sorting the symbols themselves), this ensures a stable sort. */ + if (a < b) + return -1; + if (a > b) + return 1; + return 0; } /* Search SYMS for a symbol of the given VALUE. */ diff --git a/bfd/elflink.c b/bfd/elflink.c index d0f70cb6f58..9d8dcff9a39 100644 --- a/bfd/elflink.c +++ b/bfd/elflink.c @@ -7862,6 +7862,7 @@ struct elf_symbol { Elf_Internal_Sym *isym; struct elf_symbuf_symbol *ssym; + void *p; } u; const char *name; }; @@ -7874,7 +7875,13 @@ elf_sort_elf_symbol (const void *arg1, const void *arg2) const Elf_Internal_Sym *s1 = *(const Elf_Internal_Sym **) arg1; const Elf_Internal_Sym *s2 = *(const Elf_Internal_Sym **) arg2; - return s1->st_shndx - s2->st_shndx; + if (s1->st_shndx != s2->st_shndx) + return s1->st_shndx > s2->st_shndx ? 1 : -1; + /* Final sort by the address of the sym in the symbuf ensures + a stable sort. */ + if (s1 != s2) + return s1 > s2 ? 1 : -1; + return 0; } static int @@ -7882,7 +7889,12 @@ elf_sym_name_compare (const void *arg1, const void *arg2) { const struct elf_symbol *s1 = (const struct elf_symbol *) arg1; const struct elf_symbol *s2 = (const struct elf_symbol *) arg2; - return strcmp (s1->name, s2->name); + int ret = strcmp (s1->name, s2->name); + if (ret != 0) + return ret; + if (s1->u.p != s2->u.p) + return s1->u.p > s2->u.p ? 1 : -1; + return 0; } static struct elf_symbuf_head * @@ -8005,8 +8017,10 @@ bfd_elf_match_symbols_in_sections (asection *sec1, asection *sec2, goto done; if (!info->reduce_memory_overheads) - elf_tdata (bfd1)->symbuf = ssymbuf1 - = elf_create_symbuf (symcount1, isymbuf1); + { + ssymbuf1 = elf_create_symbuf (symcount1, isymbuf1); + elf_tdata (bfd1)->symbuf = ssymbuf1; + } } if (ssymbuf1 == NULL || ssymbuf2 == NULL) @@ -8017,8 +8031,10 @@ bfd_elf_match_symbols_in_sections (asection *sec1, asection *sec2, goto done; if (ssymbuf1 != NULL && !info->reduce_memory_overheads) - elf_tdata (bfd2)->symbuf = ssymbuf2 - = elf_create_symbuf (symcount2, isymbuf2); + { + ssymbuf2 = elf_create_symbuf (symcount2, isymbuf2); + elf_tdata (bfd2)->symbuf = ssymbuf2; + } } if (ssymbuf1 != NULL && ssymbuf2 != NULL) @@ -9060,6 +9076,15 @@ struct elf_link_sort_rela Elf_Internal_Rela rela[1]; }; +/* qsort stability here and for cmp2 is only an issue if multiple + dynamic relocations are emitted at the same address. But targets + that apply a series of dynamic relocations each operating on the + result of the prior relocation can't use -z combreloc as + implemented anyway. Such schemes tend to be broken by sorting on + symbol index. That leaves dynamic NONE relocs as the only other + case where ld might emit multiple relocs at the same address, and + those are only emitted due to target bugs. */ + static int elf_link_sort_cmp1 (const void *A, const void *B) { diff --git a/bfd/merge.c b/bfd/merge.c index 632c8523903..11be669c2c3 100644 --- a/bfd/merge.c +++ b/bfd/merge.c @@ -555,6 +555,9 @@ error_return: return FALSE; } +/* qsort comparison function. Won't ever return zero as all entries + differ, so there is no issue with qsort stability here. */ + static int strrevcmp (const void *a, const void *b) {