From bf0bcef3d4975ab964d573fecbe2a8b2960b8120 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 24 Apr 2017 00:23:03 -0700 Subject: [PATCH] output: generic string table implementation Several output formats use "string tables", which is a collection of null-terminated (C) strings which are referenced by a byte offset into the string table. A single string can be referenced an arbitrary number of times. As this is quite simple to implement with a hash table, we do exactly that. Signed-off-by: H. Peter Anvin --- Makefile.in | 6 +++ Mkfiles/msvc.mak | 6 +++ Mkfiles/openwcom.mak | 6 +++ output/strtbl.c | 111 +++++++++++++++++++++++++++++++++++++++++++ output/strtbl.h | 56 ++++++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 output/strtbl.c create mode 100644 output/strtbl.h diff --git a/Makefile.in b/Makefile.in index 71783af4..22d37698 100644 --- a/Makefile.in +++ b/Makefile.in @@ -128,6 +128,7 @@ LIBOBJ = stdlib/snprintf.$(O) stdlib/vsnprintf.$(O) stdlib/strlcpy.$(O) \ macros/macros.$(O) \ \ output/outform.$(O) output/outlib.$(O) output/legacy.$(O) \ + output/strtbl.$(O) \ output/nulldbg.$(O) output/nullout.$(O) \ output/outbin.$(O) output/outaout.$(O) output/outcoff.$(O) \ output/outelf.$(O) \ @@ -759,6 +760,11 @@ output/outrdf2.$(O): output/outrdf2.c asm/directiv.h asm/pptok.h \ include/nasmint.h include/nasmlib.h include/opflags.h include/perfhash.h \ include/rdoff.h include/saa.h include/strlist.h include/tables.h \ output/outform.h output/outlib.h x86/insnsi.h x86/regs.h +output/strtbl.$(O): output/strtbl.c asm/directiv.h asm/pptok.h asm/preproc.h \ + config/config.h config/msvc.h config/unknown.h config/watcom.h \ + include/compiler.h include/error.h include/hashtbl.h include/nasm.h \ + include/nasmint.h include/nasmlib.h include/opflags.h include/perfhash.h \ + include/strlist.h include/tables.h output/strtbl.h x86/insnsi.h x86/regs.h rdoff/collectn.$(O): rdoff/collectn.c config/config.h config/msvc.h \ config/unknown.h config/watcom.h include/compiler.h include/error.h \ include/nasmint.h include/nasmlib.h include/rdoff.h rdoff/collectn.h \ diff --git a/Mkfiles/msvc.mak b/Mkfiles/msvc.mak index b8b6d3ad..44aabb5e 100644 --- a/Mkfiles/msvc.mak +++ b/Mkfiles/msvc.mak @@ -101,6 +101,7 @@ LIBOBJ = stdlib\snprintf.$(O) stdlib\vsnprintf.$(O) stdlib\strlcpy.$(O) \ macros\macros.$(O) \ \ output\outform.$(O) output\outlib.$(O) output\legacy.$(O) \ + output\strtbl.$(O) \ output\nulldbg.$(O) output\nullout.$(O) \ output\outbin.$(O) output\outaout.$(O) output\outcoff.$(O) \ output\outelf.$(O) \ @@ -645,6 +646,11 @@ output\outrdf2.$(O): output\outrdf2.c asm\directiv.h asm\pptok.h \ include\nasmlib.h include\opflags.h include\perfhash.h include\rdoff.h \ include\saa.h include\strlist.h include\tables.h output\outform.h \ output\outlib.h x86\insnsi.h x86\regs.h +output\strtbl.$(O): output\strtbl.c asm\directiv.h asm\pptok.h asm\preproc.h \ + config\msvc.h config\unknown.h config\watcom.h include\compiler.h \ + include\error.h include\hashtbl.h include\nasm.h include\nasmint.h \ + include\nasmlib.h include\opflags.h include\perfhash.h include\strlist.h \ + include\tables.h output\strtbl.h x86\insnsi.h x86\regs.h rdoff\collectn.$(O): rdoff\collectn.c config\msvc.h config\unknown.h \ config\watcom.h include\compiler.h include\error.h include\nasmint.h \ include\nasmlib.h include\rdoff.h rdoff\collectn.h rdoff\rdfutils.h diff --git a/Mkfiles/openwcom.mak b/Mkfiles/openwcom.mak index 516c3f03..9e6ee470 100644 --- a/Mkfiles/openwcom.mak +++ b/Mkfiles/openwcom.mak @@ -87,6 +87,7 @@ LIBOBJ = stdlib/snprintf.$(O) stdlib/vsnprintf.$(O) stdlib/strlcpy.$(O) & macros/macros.$(O) & & output/outform.$(O) output/outlib.$(O) output/legacy.$(O) & + output/strtbl.$(O) & output/nulldbg.$(O) output/nullout.$(O) & output/outbin.$(O) output/outaout.$(O) output/outcoff.$(O) & output/outelf.$(O) & @@ -619,6 +620,11 @@ output/outrdf2.$(O): output/outrdf2.c asm/directiv.h asm/pptok.h & include/nasmlib.h include/opflags.h include/perfhash.h include/rdoff.h & include/saa.h include/strlist.h include/tables.h output/outform.h & output/outlib.h x86/insnsi.h x86/regs.h +output/strtbl.$(O): output/strtbl.c asm/directiv.h asm/pptok.h asm/preproc.h & + config/msvc.h config/unknown.h config/watcom.h include/compiler.h & + include/error.h include/hashtbl.h include/nasm.h include/nasmint.h & + include/nasmlib.h include/opflags.h include/perfhash.h include/strlist.h & + include/tables.h output/strtbl.h x86/insnsi.h x86/regs.h rdoff/collectn.$(O): rdoff/collectn.c config/msvc.h config/unknown.h & config/watcom.h include/compiler.h include/error.h include/nasmint.h & include/nasmlib.h include/rdoff.h rdoff/collectn.h rdoff/rdfutils.h diff --git a/output/strtbl.c b/output/strtbl.c new file mode 100644 index 00000000..94dd59b6 --- /dev/null +++ b/output/strtbl.c @@ -0,0 +1,111 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2017 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +/* + * Common string table handling + * + * A number of output formats use a "string table"; a container for + * a number of strings which may be reused at will. This implements + * a string table which eliminates duplicates and returns the index + * into the string table when queried. + */ + +#include "compiler.h" + +#include "nasm.h" +#include "nasmlib.h" +#include "error.h" +#include "strtbl.h" + +struct strtbl_entry { + size_t index; + size_t bytes; + char str[1]; +}; + +void strtbl_init(struct nasm_strtbl *tbl) +{ + tbl->size = 0; + hash_init(&tbl->hash, HASH_LARGE); + strtbl_find(tbl, "", true); /* Index 0 is always an empty string */ +} + +void strtbl_free(struct nasm_strtbl *tbl) +{ + struct hash_tbl_node *iter = NULL; + struct strtbl_entry *se; + + while ((se = hash_iterate(&tbl->hash, &iter, NULL))) + nasm_free(se); + + hash_free(&tbl->hash); +} + +size_t strtbl_find(struct nasm_strtbl *tbl, const char *str, bool add) +{ + struct hash_insert hi; + void **dp; + struct strtbl_entry *se; + + dp = hash_find(&tbl->hash, str, &hi); + if (dp) { + se = *dp; + } else if (add) { + size_t bytes = strlen(str) + 1; + + se = nasm_malloc(sizeof(struct strtbl_entry)-1+bytes); + se->index = tbl->size; + tbl->size += bytes; + se->bytes = bytes; + memcpy(se->str, str, bytes); + + hash_add(&hi, se->str, se); + } else { + return STRTBL_NONE; + } + + return se->index; +} + +/* This create a linearized buffer containing the actual string table */ +void *strtbl_generate(const struct nasm_strtbl *tbl) +{ + char *buf = nasm_malloc(strtbl_size(tbl)); + struct hash_tbl_node *iter = NULL; + struct strtbl_entry *se; + + while ((se = hash_iterate(&tbl->hash, &iter, NULL))) + memcpy(buf + se->index, se->str, se->bytes); + + return buf; +} diff --git a/output/strtbl.h b/output/strtbl.h new file mode 100644 index 00000000..52a897f5 --- /dev/null +++ b/output/strtbl.h @@ -0,0 +1,56 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2017 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +#ifndef NASM_STRTBL_H +#define NASM_STRTBL_H + +#include "compiler.h" +#include "hashtbl.h" + +struct nasm_strtbl { + size_t size; + struct hash_table hash; +}; + +#define STRTBL_NONE ((size_t)-1) + +void strtbl_init(struct nasm_strtbl *tbl); +void strtbl_free(struct nasm_strtbl *tbl); +size_t strtbl_find(struct nasm_strtbl *tbl, const char *str, bool add); +static inline size_t strtbl_size(const struct nasm_strtbl *tbl) +{ + return tbl->size; +} +void * safe_alloc strtbl_generate(const struct nasm_strtbl *tbl); + +#endif /* NASM_STRTBL_H */