From 36c771b1794c43193a73201dcea51827f2ef8273 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Mon, 15 Jul 2024 19:55:40 +0100 Subject: [PATCH] libctf: fix CTF dict compression Commit 483546ce4f3 ("libctf: make ctf_serialize() actually serialize") accidentally broke dict compression. There were two bugs: - ctf_arc_write_one_ctf was still making its own decision about whether to compress the dict via direct ctf_size comparison, which is unfortunate because now that it no longer calls ctf_serialize itself, ctf_size is always zero when it does this: it should let the writing functions decide on the threshold, which they contain code to do which is simply not used for lack of one trivial wrapper to write to an fd and also provide a compression threshold - ctf_write_mem, the function underlying all writing as of the commit above, was calling zlib's compressBound and avoiding compression if this returned a value larger than the input. Unfortunately compressBound does not do a trial compression and determine whether the result is compressible: it just adds zlib header sizes to the value passed in, so our test would *always* have concluded that the value was incompressible! Avoid by simply always compressing if the raw size is larger than the threshold: zlib is quite clever enough to avoid actually compressing if the data is incompressible. Add a testcase for this. libctf/ * ctf-impl.h (ctf_write_thresholded): New... * ctf-serialize.c (ctf_write_thresholded): ... defined here, a wrapper around... (ctf_write_mem): ... this. Don't check compressibility. (ctf_compress_write): Reimplement as a ctf_write_thresholded wrapper. (ctf_write): Likewise. * ctf-archive.c (arc_write_one_ctf): Just call ctf_write_thresholded rather than trying to work out whether to compress. * testsuite/libctf-writable/ctf-compressed.*: New test. --- libctf/ctf-archive.c | 8 +- libctf/ctf-impl.h | 2 + libctf/ctf-serialize.c | 51 ++---- .../libctf-writable/ctf-compressed.c | 158 ++++++++++++++++++ .../libctf-writable/ctf-compressed.lk | 2 + 5 files changed, 179 insertions(+), 42 deletions(-) create mode 100644 libctf/testsuite/libctf-writable/ctf-compressed.c create mode 100644 libctf/testsuite/libctf-writable/ctf-compressed.lk diff --git a/libctf/ctf-archive.c b/libctf/ctf-archive.c index c602705b310..0034bf0982a 100644 --- a/libctf/ctf-archive.c +++ b/libctf/ctf-archive.c @@ -265,16 +265,10 @@ arc_write_one_ctf (ctf_dict_t *f, int fd, size_t threshold) uint64_t ctfsz = 0; char *ctfszp; size_t ctfsz_len; - int (*writefn) (ctf_dict_t * fp, int fd); if ((off = lseek (fd, 0, SEEK_CUR)) < 0) return errno * -1; - if (f->ctf_size > threshold) - writefn = ctf_compress_write; - else - writefn = ctf_write; - /* This zero-write turns into the size in a moment. */ ctfsz_len = sizeof (ctfsz); ctfszp = (char *) &ctfsz; @@ -287,7 +281,7 @@ arc_write_one_ctf (ctf_dict_t *f, int fd, size_t threshold) ctfszp += writelen; } - if (writefn (f, fd) != 0) + if (ctf_write_thresholded (f, fd, threshold) != 0) return f->ctf_errno * -1; if ((end_off = lseek (fd, 0, SEEK_CUR)) < 0) diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index b17a2d8699d..0d88e639884 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -762,6 +762,8 @@ extern int ctf_flip (ctf_dict_t *, ctf_header_t *, unsigned char *, int); extern int ctf_import_unref (ctf_dict_t *fp, ctf_dict_t *pfp); +extern int ctf_write_thresholded (ctf_dict_t *fp, int fd, size_t threshold); + _libctf_malloc_ extern void *ctf_mmap (size_t length, size_t offset, int fd); extern void ctf_munmap (void *, size_t); diff --git a/libctf/ctf-serialize.c b/libctf/ctf-serialize.c index 2d1f014db28..9efdb16e0d6 100644 --- a/libctf/ctf-serialize.c +++ b/libctf/ctf-serialize.c @@ -1180,10 +1180,10 @@ ctf_write_mem (ctf_dict_t *fp, size_t *size, size_t threshold) alloc_len = compressBound (rawbufsiz - sizeof (ctf_header_t)) + sizeof (ctf_header_t); - /* Trivial operation if the buffer is incompressible or too small to bother - compressing, and we're not doing a forced write-time flip. */ + /* Trivial operation if the buffer is too small to bother compressing, and + we're not doing a forced write-time flip. */ - if (rawbufsiz < threshold || rawbufsiz < alloc_len) + if (rawbufsiz < threshold) { alloc_len = rawbufsiz; uncompressed = 1; @@ -1248,10 +1248,10 @@ err: return NULL; } -/* Compress the specified CTF data stream and write it to the specified file - descriptor. */ +/* Write the compressed CTF data stream to the specified file descriptor, + possibly compressed. Internal only (for now). */ int -ctf_compress_write (ctf_dict_t *fp, int fd) +ctf_write_thresholded (ctf_dict_t *fp, int fd, size_t threshold) { unsigned char *buf; unsigned char *bp; @@ -1260,7 +1260,7 @@ ctf_compress_write (ctf_dict_t *fp, int fd) ssize_t len; int err = 0; - if ((buf = ctf_write_mem (fp, &tmp, 0)) == NULL) + if ((buf = ctf_write_mem (fp, &tmp, threshold)) == NULL) return -1; /* errno is set for us. */ buf_len = tmp; @@ -1283,36 +1283,17 @@ ret: return err; } +/* Compress the specified CTF data stream and write it to the specified file + descriptor. */ +int +ctf_compress_write (ctf_dict_t *fp, int fd) +{ + return ctf_write_thresholded (fp, fd, 0); +} + /* Write the uncompressed CTF data stream to the specified file descriptor. */ int ctf_write (ctf_dict_t *fp, int fd) { - unsigned char *buf; - unsigned char *bp; - size_t tmp; - ssize_t buf_len; - ssize_t len; - int err = 0; - - if ((buf = ctf_write_mem (fp, &tmp, (size_t) -1)) == NULL) - return -1; /* errno is set for us. */ - - buf_len = tmp; - bp = buf; - - while (buf_len > 0) - { - if ((len = write (fd, bp, buf_len)) < 0) - { - err = ctf_set_errno (fp, errno); - ctf_err_warn (fp, 0, 0, _("ctf_compress_write: error writing")); - goto ret; - } - buf_len -= len; - bp += len; - } - -ret: - free (buf); - return err; + return ctf_write_thresholded (fp, fd, (size_t) -1); } diff --git a/libctf/testsuite/libctf-writable/ctf-compressed.c b/libctf/testsuite/libctf-writable/ctf-compressed.c new file mode 100644 index 00000000000..4769cdb97f2 --- /dev/null +++ b/libctf/testsuite/libctf-writable/ctf-compressed.c @@ -0,0 +1,158 @@ +/* Make sure linking and writing an archive with a low threshold correctly + compresses it. (This tests for two bugs, one where archives weren't + serialized regardless of their threshold, and another where nothing was.) */ + +#include +#include +#include +#include + +int +main (int argc, char *argv[]) +{ + ctf_dict_t *in1; + ctf_dict_t *in2; + ctf_dict_t *fp; + ctf_dict_t *dump_fp; + ctf_archive_t *arc1, *arc2; + ctf_archive_t *final_arc; + ctf_sect_t s1, s2; + ctf_encoding_t encoding = { CTF_INT_SIGNED, 0, sizeof (char) }; + ctf_encoding_t encoding2 = { CTF_INT_SIGNED, 0, sizeof (long long) }; + unsigned char *buf1, *buf2, *buf3; + size_t buf1_sz, buf2_sz, buf3_sz; + ctf_dump_state_t *dump_state = NULL; + ctf_next_t *i = NULL; + int err; + + /* Linking does not currently work on mingw because of an unreliable tmpfile + implementation on that platform (see + https://github.com/msys2/MINGW-packages/issues/18878). Simply skip for + now. */ + +#ifdef __MINGW32__ + printf ("UNSUPPORTED: platform bug breaks ctf_link\n"); + return 0; +#else + + if ((fp = ctf_create (&err)) == NULL) + goto create_err; + + if ((in1 = ctf_create (&err)) == NULL) + goto create_err; + + if ((in2 = ctf_create (&err)) == NULL) + goto create_err; + + /* Force a conflict to get an archive created. */ + + if ((ctf_add_integer (in1, CTF_ADD_ROOT, "foo", &encoding)) == CTF_ERR) + { + fprintf (stderr, "Cannot add: %s\n", ctf_errmsg (ctf_errno (in1))); + return 1; + } + + if ((ctf_add_integer (in2, CTF_ADD_ROOT, "foo", &encoding2)) == CTF_ERR) + { + fprintf (stderr, "Cannot add: %s\n", ctf_errmsg (ctf_errno (in2))); + return 1; + } + + /* Write them out and read them back in, to turn them into archives. + This would be unnecessary if ctf_link_add() were public :( */ + if ((buf1 = ctf_write_mem (in1, &buf1_sz, -1)) == NULL) + { + fprintf (stderr, "Cannot serialize: %s\n", ctf_errmsg (ctf_errno (in1))); + return 1; + } + + if ((buf2 = ctf_write_mem (in2, &buf2_sz, -1)) == NULL) + { + fprintf (stderr, "Cannot serialize: %s\n", ctf_errmsg (ctf_errno (in2))); + return 1; + } + + s1.cts_name = "foo"; + s2.cts_name = "bar"; + s1.cts_data = (void *) buf1; + s2.cts_data = (void *) buf2; + s1.cts_size = buf1_sz; + s2.cts_size = buf2_sz; + s1.cts_entsize = 64; /* Unimportant. */ + s2.cts_entsize = 64; /* Unimportant. */ + + if ((arc1 = ctf_arc_bufopen (&s1, NULL, NULL, &err)) == NULL || + (arc2 = ctf_arc_bufopen (&s2, NULL, NULL, &err)) == NULL) + goto open_err; + + ctf_dict_close (in1); + ctf_dict_close (in2); + + /* Link them together. */ + + if (ctf_link_add_ctf (fp, arc1, "a") < 0 || + ctf_link_add_ctf (fp, arc2, "b") < 0) + goto link_err; + + if (ctf_link (fp, 0) < 0) + goto link_err; + + /* Write them out. We need a new buf here, because the archives are still + using the other two bufs. */ + + if ((buf3 = ctf_link_write (fp, &buf3_sz, 1)) == NULL) + goto link_err; + + /* Read them back in. */ + + s1.cts_data = (void *) buf3; + s1.cts_size = buf3_sz; + + if ((final_arc = ctf_arc_bufopen (&s1, NULL, NULL, &err)) == NULL) + goto open_err; + + if (ctf_archive_count (final_arc) != 2) + { + fprintf (stderr, "Archive is the wrong length: %zi.\n", ctf_archive_count (final_arc)); + return -1; + } + + /* Dump the header of each archive member, and search for CTF_F_COMPRESS in + the resulting dump. */ + while ((dump_fp = ctf_archive_next (final_arc, &i, NULL, 0, &err)) != NULL) + { + char *dumpstr; + + while ((dumpstr = ctf_dump (dump_fp, &dump_state, CTF_SECT_HEADER, + NULL, NULL)) != NULL) + { + if (strstr (dumpstr, "CTF_F_COMPRESS") != NULL) + printf ("Output is compressed.\n"); + free (dumpstr); + } + ctf_dict_close (dump_fp); + } + if (err != ECTF_NEXT_END) + { + fprintf (stderr, "Archive iteation error: %s\n", ctf_errmsg (err)); + return 1; + } + + ctf_arc_close (final_arc); + free (buf1); + free (buf2); + free (buf3); + ctf_dict_close (fp); + return 0; + + create_err: + fprintf (stderr, "Cannot create: %s\n", ctf_errmsg (err)); + return 1; + open_err: + fprintf (stderr, "Cannot open: %s\n", ctf_errmsg (err)); + return 1; + link_err: + fprintf (stderr, "Cannot link: %s\n", ctf_errmsg (ctf_errno (fp))); + return 1; +#endif +} diff --git a/libctf/testsuite/libctf-writable/ctf-compressed.lk b/libctf/testsuite/libctf-writable/ctf-compressed.lk new file mode 100644 index 00000000000..cf611c36f97 --- /dev/null +++ b/libctf/testsuite/libctf-writable/ctf-compressed.lk @@ -0,0 +1,2 @@ +Output is compressed. +Output is compressed.