libctf, include: add ctf_dict_set_flag: less enum dup checking by default

The recent change to detect duplicate enum values and return ECTF_DUPLICATE
when found turns out to perturb a great many callers.  In particular, the
pahole-created kernel BTF has the same problem we historically did, and
gleefully emits duplicated enum constants in profusion.  Handling the
resulting duplicate errors from BTF -> CTF converters reasonably is
unreasonably difficult (it amounts to forcing them to skip some types or
reimplement the deduplicator).

So let's step back a bit.  What we care about mostly is that the
deduplicator treat enums with conflicting enumeration constants as
conflicting types: programs that want to look up enumeration constant ->
value mappings using the new APIs to do so might well want the same checks
to apply to any ctf_add_* operations they carry out (and since they're
*using* the new APIs, added at the same time as this restriction was
imposed, there is likely to be no negative consequence of this).

So we want some way to allow processes that know about duplicate detection
to opt into it, while allowing everyone else to stay clear of it: but we
want ctf_link to get this behaviour even if its caller has opted out.

So add a new concept to the API: dict-wide CTF flags, set via
ctf_dict_set_flag, obtained via ctf_dict_get_flag.  They are not bitflags
but simple arbitrary integers and an on/off value, stored in an unspecified
manner (the one current flag, we translate into an LCTF_* flag value in the
internal ctf_dict ctf_flags word). If you pass in an invalid flag or value
you get a new ECTF_BADFLAG error, so the caller can easily tell whether
flags added in future are valid with a particular libctf or not.

We check this flag in ctf_add_enumerator, and set it around the link
(including on child per-CU dicts).  The newish enumerator-iteration test is
souped up to check the semantics of the flag as well.

The fact that the flag can be set and unset at any time has curious
consequences. You can unset the flag, insert a pile of duplicates, then set
it and expect the new duplicates to be detected, not only by
ctf_add_enumerator but also by ctf_lookup_enumerator.  This means we now
have to maintain the ctf_names and conflicting_enums enum-duplication
tracking as new enums are added, not purely as the dict is opened.
Move that code out of init_static_types_internal and into a new
ctf_track_enumerator function that addition can also call.

(None of this affects the file format or serialization machinery, which has
to be able to handle duplicate enumeration constants no matter what.)

include/
	* ctf-api.h (CTF_ERRORS) [ECTF_BADFLAG]: New.
	(ECTF_NERR): Update.
	(CTF_STRICT_NO_DUP_ENUMERATORS): New flag.
	(ctf_dict_set_flag): New function.
	(ctf_dict_get_flag): Likewise.

libctf/
	* ctf-impl.h (LCTF_STRICT_NO_DUP_ENUMERATORS): New flag.
	(ctf_track_enumerator): Declare.
	* ctf-dedup.c (ctf_dedup_emit_type): Set it.
	* ctf-link.c (ctf_create_per_cu): Likewise.
	(ctf_link_deduplicating_per_cu): Likewise.
	(ctf_link): Likewise.
	(ctf_link_write): Likewise.
	* ctf-subr.c (ctf_dict_set_flag): New function.
	(ctf_dict_get_flag): New function.
	* ctf-open.c (init_static_types_internal): Move enum tracking to...
	* ctf-create.c (ctf_track_enumerator): ... this new function.
	(ctf_add_enumerator): Call it.
	* libctf.ver: Add the new functions.
	* testsuite/libctf-lookup/enumerator-iteration.c: Test them.
This commit is contained in:
Nick Alcock 2024-07-26 21:38:24 +01:00
parent e1a8c74214
commit 3c21a5bedb
No known key found for this signature in database
9 changed files with 195 additions and 47 deletions

View File

@ -243,7 +243,8 @@ typedef struct ctf_snapshot_id
_CTF_ITEM (ECTF_FLAGS, "CTF header contains flags unknown to libctf.") \
_CTF_ITEM (ECTF_NEEDSBFD, "This feature needs a libctf with BFD support.") \
_CTF_ITEM (ECTF_INCOMPLETE, "Type is not a complete type.") \
_CTF_ITEM (ECTF_NONAME, "Type name must not be empty.")
_CTF_ITEM (ECTF_NONAME, "Type name must not be empty.") \
_CTF_ITEM (ECTF_BADFLAG, "Invalid CTF dict flag specified.")
#define ECTF_BASE 1000 /* Base value for libctf errnos. */
@ -256,7 +257,7 @@ _CTF_ERRORS
#undef _CTF_FIRST
};
#define ECTF_NERR (ECTF_NONAME - ECTF_BASE + 1) /* Count of CTF errors. */
#define ECTF_NERR (ECTF_BADFLAG - ECTF_BASE + 1) /* Count of CTF errors. */
/* The CTF data model is inferred to be the caller's data model or the data
model of the given object, unless ctf_setmodel is explicitly called. */
@ -282,6 +283,12 @@ _CTF_ERRORS
#define CTF_MN_RECURSE 0x1 /* Recurse into unnamed members. */
/* Flags for ctf_dict_set_flag. */
/* If set, duplicate enumerators in a single dict fail with ECTF_DUPLICATE. */
#define CTF_STRICT_NO_DUP_ENUMERATORS 0x1
/* These typedefs are used to define the signature for callback functions that
can be used with the iteration and visit functions below. There is also a
family of iteration functions that do not require callbacks. */
@ -350,6 +357,11 @@ extern ctf_archive_t *ctf_open (const char *filename,
const char *target, int *errp);
extern void ctf_close (ctf_archive_t *);
/* Set or unset dict-wide boolean flags, and get the value of these flags. */
extern int ctf_dict_set_flag (ctf_dict_t *, uint64_t flag, int set);
extern int ctf_dict_get_flag (ctf_dict_t *, uint64_t flag);
/* Return the data, symbol, or string sections used by a given CTF dict. */
extern ctf_sect_t ctf_getdatasect (const ctf_dict_t *);
extern ctf_sect_t ctf_getsymsect (const ctf_dict_t *);

View File

@ -1071,7 +1071,13 @@ ctf_add_enumerator (ctf_dict_t *fp, ctf_id_t enid, const char *name,
if the enum they are part of is a root-visible type. */
if (root == CTF_ADD_ROOT && ctf_dynhash_lookup (fp->ctf_names, name))
return (ctf_set_errno (ofp, ECTF_DUPLICATE));
{
if (fp->ctf_flags & LCTF_STRICT_NO_DUP_ENUMERATORS)
return (ctf_set_errno (ofp, ECTF_DUPLICATE));
if (ctf_track_enumerator (fp, enid, name) < 0)
return (ctf_set_errno (ofp, ctf_errno (fp)));
}
if (kind != CTF_K_ENUM)
return (ctf_set_errno (ofp, ECTF_NOTENUM));
@ -1094,7 +1100,7 @@ ctf_add_enumerator (ctf_dict_t *fp, ctf_id_t enid, const char *name,
non-root-visible types, since the duplicate detection above does the job
for root-visible types just fine. */
if (root == CTF_ADD_NONROOT)
if (root == CTF_ADD_NONROOT && (fp->ctf_flags & LCTF_STRICT_NO_DUP_ENUMERATORS))
{
size_t i;
@ -1429,6 +1435,42 @@ ctf_add_func_sym (ctf_dict_t *fp, const char *name, ctf_id_t id)
return (ctf_add_funcobjt_sym (fp, 1, name, id));
}
/* Add an enumeration constant observed in a given enum type as an identifier.
They appear as names that cite the enum type.
Constants that appear in more than one enum, or which are already the names
of types, appear in ctf_conflicting_enums as well.
This is done for all enumeration types at open time, and for newly-added ones
as well: if the strict-enum flag is turned on, this table must be kept up to
date with enums added in the interim. */
int
ctf_track_enumerator (ctf_dict_t *fp, ctf_id_t type, const char *cte_name)
{
int err;
if (ctf_dynhash_lookup_type (fp->ctf_names, cte_name) == 0)
{
uint32_t name = ctf_str_add (fp, cte_name);
if (name == 0)
return -1; /* errno is set for us. */
err = ctf_dynhash_insert_type (fp, fp->ctf_names, type, name);
}
else
{
err = ctf_dynset_insert (fp->ctf_conflicting_enums, (void *)
cte_name);
if (err != 0)
ctf_set_errno (fp, err * -1);
}
if (err != 0)
return -1; /* errno is set for us. */
return 0;
}
typedef struct ctf_bundle
{
ctf_dict_t *ctb_dict; /* CTF dict handle. */

View File

@ -2683,6 +2683,7 @@ ctf_dedup_emit_type (const char *hval, ctf_dict_t *output, ctf_dict_t **inputs,
return ctf_set_errno (output, err);
}
target->ctf_flags |= LCTF_STRICT_NO_DUP_ENUMERATORS;
ctf_import_unref (target, output);
if (ctf_cuname (input) != NULL)
ctf_cuname_set (target, ctf_cuname (input));

View File

@ -602,7 +602,8 @@ struct ctf_next
((fp)->ctf_dictops->ctfo_get_vbytes(fp, kind, size, vlen))
#define LCTF_CHILD 0x0001 /* CTF dict is a child. */
#define LCTF_LINKING 0x0002 /* CTF link is underway: respect ctf_link_flags. */
#define LCTF_LINKING 0x0002 /* CTF link is underway: respect ctf_link_flags. */
#define LCTF_STRICT_NO_DUP_ENUMERATORS 0x0004 /* Duplicate enums prohibited. */
extern ctf_dynhash_t *ctf_name_table (ctf_dict_t *, int);
extern const ctf_type_t *ctf_lookup_by_id (ctf_dict_t **, ctf_id_t);
@ -713,6 +714,8 @@ extern int ctf_add_variable_forced (ctf_dict_t *, const char *, ctf_id_t);
extern int ctf_add_funcobjt_sym_forced (ctf_dict_t *, int is_function,
const char *, ctf_id_t);
extern int ctf_track_enumerator (ctf_dict_t *, ctf_id_t, const char *);
extern int ctf_dedup_atoms_init (ctf_dict_t *);
extern int ctf_dedup (ctf_dict_t *, ctf_dict_t **, uint32_t ninputs,
int cu_mapped);

View File

@ -336,6 +336,8 @@ ctf_create_per_cu (ctf_dict_t *fp, ctf_dict_t *input, const char *cu_name)
return NULL;
}
/* The deduplicator is ready for strict enumerator value checking. */
cu_fp->ctf_flags |= LCTF_STRICT_NO_DUP_ENUMERATORS;
ctf_import_unref (cu_fp, fp);
if ((dynname = ctf_new_per_cu_name (fp, ctf_name)) == NULL)
@ -1233,6 +1235,9 @@ ctf_link_deduplicating_per_cu (ctf_dict_t *fp)
goto err_inputs;
}
/* The deduplicator is ready for strict enumerator value checking. */
out->ctf_flags |= LCTF_STRICT_NO_DUP_ENUMERATORS;
/* Share the atoms table to reduce memory usage. */
out->ctf_dedup_atoms = fp->ctf_dedup_atoms_alloc;
@ -1498,6 +1503,7 @@ int
ctf_link (ctf_dict_t *fp, int flags)
{
int err;
int oldflags = fp->ctf_flags;
fp->ctf_link_flags = flags;
@ -1515,9 +1521,9 @@ ctf_link (ctf_dict_t *fp, int flags)
if (fp->ctf_link_outputs == NULL)
return ctf_set_errno (fp, ENOMEM);
fp->ctf_flags |= LCTF_LINKING;
fp->ctf_flags |= LCTF_LINKING & LCTF_STRICT_NO_DUP_ENUMERATORS;
ctf_link_deduplicating (fp);
fp->ctf_flags &= ~LCTF_LINKING;
fp->ctf_flags = oldflags;
if ((ctf_errno (fp) != 0) && (ctf_errno (fp) != ECTF_NOCTFDATA))
return -1;
@ -1537,14 +1543,14 @@ ctf_link (ctf_dict_t *fp, int flags)
const char *to = (const char *) k;
if (ctf_create_per_cu (fp, NULL, to) == NULL)
{
fp->ctf_flags &= ~LCTF_LINKING;
fp->ctf_flags = oldflags;
ctf_next_destroy (i);
return -1; /* Errno is set for us. */
}
}
if (err != ECTF_NEXT_END)
{
fp->ctf_flags &= ~LCTF_LINKING;
fp->ctf_flags = oldflags;
ctf_err_warn (fp, 1, err, _("iteration error creating empty CUs"));
return ctf_set_errno (fp, err);
}
@ -2040,9 +2046,14 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
goto err_no;
}
/* Turn off the is-linking flag on all the dicts in this link. */
/* Turn off the is-linking flag on all the dicts in this link: if the strict enum
checking flag is off on the parent, turn it off on all the children too. */
for (i = 0; i < arg.i; i++)
arg.files[i]->ctf_flags &= ~LCTF_LINKING;
{
arg.files[i]->ctf_flags &= ~LCTF_LINKING;
if (!(fp->ctf_flags & LCTF_STRICT_NO_DUP_ENUMERATORS))
arg.files[i]->ctf_flags &= ~LCTF_STRICT_NO_DUP_ENUMERATORS;
}
*size = fsize;
free (arg.names);
@ -2061,9 +2072,13 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
err_no:
ctf_set_errno (fp, errno);
/* Turn off the is-linking flag on all the dicts in this link. */
/* Turn off the is-linking flag on all the dicts in this link, as above. */
for (i = 0; i < arg.i; i++)
arg.files[i]->ctf_flags &= ~LCTF_LINKING;
{
arg.files[i]->ctf_flags &= ~LCTF_LINKING;
if (!(fp->ctf_flags & LCTF_STRICT_NO_DUP_ENUMERATORS))
arg.files[i]->ctf_flags &= ~LCTF_STRICT_NO_DUP_ENUMERATORS;
}
err:
free (buf);
if (f)

View File

@ -1024,9 +1024,8 @@ init_static_types_internal (ctf_dict_t *fp, ctf_header_t *cth,
ctf_dprintf ("%lu total types processed\n", fp->ctf_typemax);
/* In the third pass, we traverse the enums we spotted earlier and add all
the enumeration constants therein either to the types table (if no
type exists with that name) or to ctf_conflciting_enums (otherwise).
/* In the third pass, we traverse the enums we spotted earlier and track all
the enumeration constants to aid in future detection of duplicates.
Doing this in a third pass is necessary to avoid the case where an
enum appears with a constant FOO, then later a type named FOO appears,
@ -1040,36 +1039,12 @@ init_static_types_internal (ctf_dict_t *fp, ctf_header_t *cth,
while ((cte_name = ctf_enum_next (fp, enum_id, &i_constants, NULL)) != NULL)
{
/* Add all the enumeration constants as identifiers. They all appear
as types that cite the original enum.
Constants that appear in more than one enum, or which are already
the names of types, appear in ctf_conflicting_enums as well. */
if (ctf_dynhash_lookup_type (fp->ctf_names, cte_name) == 0)
if (ctf_track_enumerator (fp, enum_id, cte_name) < 0)
{
uint32_t name = ctf_str_add (fp, cte_name);
if (name == 0)
goto enum_err;
err = ctf_dynhash_insert_type (fp, fp->ctf_names, enum_id, name);
ctf_next_destroy (i_constants);
ctf_next_destroy (i);
return ctf_errno (fp);
}
else
{
err = ctf_dynset_insert (fp->ctf_conflicting_enums, (void *)
cte_name);
if (err != 0)
goto enum_err;
}
continue;
enum_err:
ctf_set_errno (fp, err);
ctf_next_destroy (i_constants);
ctf_next_destroy (i);
return ctf_errno (fp);
}
if (ctf_errno (fp) != ECTF_NEXT_END)
{

View File

@ -152,6 +152,43 @@ ctf_version (int version)
return _libctf_version;
}
/* Get and set CTF dict-wide flags. We are fairly strict about returning
errors here, to make it easier to determine programmatically which flags are
valid. */
int
ctf_dict_set_flag (ctf_dict_t *fp, uint64_t flag, int set)
{
if (set < 0 || set > 1)
return (ctf_set_errno (fp, ECTF_BADFLAG));
switch (flag)
{
case CTF_STRICT_NO_DUP_ENUMERATORS:
if (set)
fp->ctf_flags |= LCTF_STRICT_NO_DUP_ENUMERATORS;
else
fp->ctf_flags &= ~LCTF_STRICT_NO_DUP_ENUMERATORS;
break;
default:
return (ctf_set_errno (fp, ECTF_BADFLAG));
}
return 0;
}
int
ctf_dict_get_flag (ctf_dict_t *fp, uint64_t flag)
{
switch (flag)
{
case CTF_STRICT_NO_DUP_ENUMERATORS:
return (fp->ctf_flags & LCTF_STRICT_NO_DUP_ENUMERATORS) != 0;
default:
return (ctf_set_errno (fp, ECTF_BADFLAG));
}
return 0;
}
void
libctf_init_debug (void)
{

View File

@ -204,4 +204,6 @@ LIBCTF_1.3 {
ctf_lookup_enumerator;
ctf_lookup_enumerator_next;
ctf_arc_lookup_enumerator_next;
ctf_dict_set_flag;
ctf_dict_get_flag;
} LIBCTF_1.2;

View File

@ -56,19 +56,40 @@ main (int argc, char *argv[])
if ((ctf = ctf_open (argv[1], NULL, &err)) == NULL)
goto open_err;
/* Look for all instances of ENUMSAMPLE2_1, and add some new enums to all
/* Look for all instances of ENUMSAMPLE2_2, and add some new enums to all
dicts found, to test dynamic enum iteration as well as static.
Add two enums with a different name and constants to any that should
already be there (one hidden), and one with the same constants, but hidden,
to test ctf_lookup_enumerator_next()'s multiple-lookup functionality and
ctf_lookup_enumerator() in the presence of hidden types. */
ctf_lookup_enumerator() in the presence of hidden types.
This also tests that you can add to enums under iteration without causing
disaster. */
printf ("First iteration: addition of enums.\n");
while ((type = ctf_arc_lookup_enumerator_next (ctf, "IENUMSAMPLE2_2", &i,
&val, &fp, &err)) != CTF_ERR)
{
char *foo;
int dynadd2_value;
int old_dynadd2_flag;
/* Make sure that getting and setting a garbage flag, and setting one to a
garbage value, fails properly. */
if (ctf_dict_set_flag (fp, CTF_STRICT_NO_DUP_ENUMERATORS, 666) >= 0
|| ctf_errno (fp) != ECTF_BADFLAG)
fprintf (stderr, "Invalid flag value setting did not fail as it ought to\n");
if (ctf_dict_set_flag (fp, 0, 1) >= 0 || ctf_errno (fp) != ECTF_BADFLAG)
fprintf (stderr, "Invalid flag setting did not fail as it ought to\n");
if (ctf_dict_get_flag (fp, 0) >= 0 || ctf_errno (fp) != ECTF_BADFLAG)
fprintf (stderr, "Invalid flag getting did not fail as it ought to\n");
/* Set it strict for now. */
if (ctf_dict_set_flag (fp, CTF_STRICT_NO_DUP_ENUMERATORS, 1) < 0)
goto set_flag_err;
printf ("IENUMSAMPLE2_2 in %s has value %li\n",
foo = ctf_type_aname (fp, type), (long int) val);
@ -79,8 +100,10 @@ main (int argc, char *argv[])
if (ctf_add_enumerator (fp, type, "DYNADD", counter += 10) < 0)
goto enumerator_add_err;
if (ctf_add_enumerator (fp, type, "DYNADD2", counter += 10) < 0)
goto enumerator_add_err;
dynadd2_value = counter;
/* Make sure that overlapping enumerator addition fails as it should. */
@ -88,6 +111,32 @@ main (int argc, char *argv[])
|| ctf_errno (fp) != ECTF_DUPLICATE)
fprintf (stderr, "Duplicate enumerator addition did not fail as it ought to\n");
/* Make sure that it still fails if you set an enum value to the value it
already has. */
if (ctf_add_enumerator (fp, type, "DYNADD2", dynadd2_value) >= 0
|| ctf_errno (fp) != ECTF_DUPLICATE)
fprintf (stderr, "Duplicate enumerator addition did not fail as it ought to\n");
/* Flip the strict flag and try again. This time, it should succeed. */
if ((old_dynadd2_flag = ctf_dict_get_flag (fp, CTF_STRICT_NO_DUP_ENUMERATORS)) < 0)
goto get_flag_err;
if (ctf_dict_set_flag (fp, CTF_STRICT_NO_DUP_ENUMERATORS, 0) < 0)
goto set_flag_err;
if (ctf_add_enumerator (fp, type, "DYNADD2", dynadd2_value) < 0)
goto enumerator_add_err;
/* Flip it again and try *again*. This time it should fail again. */
if (ctf_dict_set_flag (fp, CTF_STRICT_NO_DUP_ENUMERATORS, old_dynadd2_flag) < 0)
goto set_flag_err;
if (ctf_add_enumerator (fp, type, "DYNADD2", dynadd2_value) >= 0
|| ctf_errno (fp) != ECTF_DUPLICATE)
fprintf (stderr, "Duplicate enumerator addition did not fail as it ought to\n");
if ((type = ctf_add_enum (fp, CTF_ADD_NONROOT, "ie4_hidden")) == CTF_ERR)
goto enum_add_err;
@ -104,12 +153,18 @@ main (int argc, char *argv[])
if (ctf_add_enumerator (fp, type, "DYNADD2", counter += 10) < 0)
goto enumerator_add_err;
/* Look them up via ctf_lookup_enumerator. */
/* Look them up via ctf_lookup_enumerator. DYNADD2 should fail because
it has duplicate enumerators. */
if (ctf_lookup_enumerator (fp, "DYNADD", &val) == CTF_ERR)
goto enumerator_lookup_err;
printf ("direct lookup: DYNADD value: %i\n", (int) val);
if ((err = ctf_lookup_enumerator (fp, "DYNADD2", &val)) >= 0 ||
ctf_errno (fp) != ECTF_DUPLICATE)
fprintf (stderr, "Duplicate enumerator lookup did not fail as it ought to: %i, %s\n",
err, ctf_errmsg (ctf_errno (fp)));
if ((type = ctf_lookup_enumerator (fp, "DYNADD3", &val) != CTF_ERR) ||
ctf_errno (fp) != ECTF_NOENUMNAM)
{
@ -164,4 +219,10 @@ main (int argc, char *argv[])
fprintf (stderr, "Cannot look up enumerator in dict \"%s\": %s\n",
ctf_cuname (fp) ? ctf_cuname (fp) : "(null: parent)", ctf_errmsg (ctf_errno (fp)));
return 1;
get_flag_err:
fprintf (stderr, "ctf_dict_get_flag failed: %s\n", ctf_errmsg (ctf_errno (fp)));
return 1;
set_flag_err:
fprintf (stderr, "ctf_dict_set_flag failed: %s\n", ctf_errmsg (ctf_errno (fp)));
return 1;
}