Fix infinite loop in file inferencing

re: Issue https://github.com/Unidata/netcdf-c/issues/2573

The file type inferencer in libdispatch/dinference.c has a simple
forward inference mechanism so that the occurrence of certain mode
values in a URL fragment implies inclusion of additional mode values.
This kind of inference is notorious for leading to cycles if not
careful. Unfortunately, this occurred in the one in dinference.c.

This was fixed by providing a more complicated, but more reliable inference
mechanism.

## Misc. Other Changes
* Found and fixed a couple of memory leaks.
* There is a recent problem in building HDF4 support on github actions. Fixed by using the internal HDF4 xdr capability.
* Some filter-related code was not being properly ifdef'd with ENABLE_NCZARRA_FILTERS.
This commit is contained in:
Dennis Heimbigner 2022-12-18 13:18:00 -07:00
parent 0fc2c817b2
commit a03bb5e601
10 changed files with 177 additions and 86 deletions

View File

@ -7,7 +7,7 @@
name: Run macOS-based netCDF Tests
on: [pull_request, workflow_dispatch]
on: [pull_request,workflow_dispatch]
jobs:

View File

@ -4,7 +4,7 @@
name: Run Ubuntu/Linux netCDF Tests
on: [pull_request, workflow_dispatch]
on: [pull_request,workflow_dispatch]
jobs:
@ -42,7 +42,7 @@ jobs:
wget https://support.hdfgroup.org/ftp/HDF/releases/HDF4.2.15/src/hdf-4.2.15.tar.bz2
tar -jxf hdf-4.2.15.tar.bz2
pushd hdf-4.2.15
./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib
./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib --enable-hdf4-xdr
make -j
make install -j
popd
@ -89,7 +89,7 @@ jobs:
wget https://support.hdfgroup.org/ftp/HDF/releases/HDF4.2.15/src/hdf-4.2.15.tar.bz2
tar -jxf hdf-4.2.15.tar.bz2
pushd hdf-4.2.15
CC=mpicc ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib --enable-parallel
CC=mpicc ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib --enable-parallel --enable-hdf4-xdr
make -j
make install -j
popd

View File

@ -1,6 +1,6 @@
name: Run Cygwin-based tests
on: [pull_request, workflow_dispatch]
on: [pull_request,workflow_dispatch]
env:
SHELLOPTS: igncr

View File

@ -9,7 +9,7 @@ name: Run MSYS2, MinGW64-based Tests
env:
CPPFLAGS: "-D_BSD_SOURCE"
on: [pull_request, workflow_dispatch]
on: [pull_request,workflow_dispatch]
jobs:

View File

@ -1839,19 +1839,16 @@ NC_create(const char *path0, int cmode, size_t initialsz,
TRACE(nc_create);
if(path0 == NULL)
return NC_EINVAL;
{stat = NC_EINVAL; goto done;}
/* Check mode flag for sanity. */
if ((stat = check_create_mode(cmode)))
return stat;
if ((stat = check_create_mode(cmode))) goto done;
/* Initialize the library. The available dispatch tables
* will depend on how netCDF was built
* (with/without netCDF-4, DAP, CDMREMOTE). */
if(!NC_initialized)
{
if ((stat = nc_initialize()))
return stat;
if(!NC_initialized) {
if ((stat = nc_initialize())) goto done;
}
{
@ -1863,10 +1860,7 @@ NC_create(const char *path0, int cmode, size_t initialsz,
memset(&model,0,sizeof(model));
newpath = NULL;
if((stat = NC_infermodel(path,&cmode,1,useparallel,NULL,&model,&newpath))) {
nullfree(newpath);
goto done;
}
if((stat = NC_infermodel(path,&cmode,1,useparallel,NULL,&model,&newpath))) goto done;
if(newpath) {
nullfree(path);
path = newpath;
@ -1918,7 +1912,7 @@ NC_create(const char *path0, int cmode, size_t initialsz,
dispatcher = NC3_dispatch_table;
break;
default:
return NC_ENOTNC;
{stat = NC_ENOTNC; goto done;}
}
/* Create the NC* instance and insert its dispatcher and model */
@ -1937,6 +1931,7 @@ NC_create(const char *path0, int cmode, size_t initialsz,
}
done:
nullfree(path);
nullfree(newpath);
return stat;
}
@ -1980,12 +1975,12 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp,
TRACE(nc_open);
if(!NC_initialized) {
stat = nc_initialize();
if(stat) return stat;
if(stat) goto done;
}
/* Check inputs. */
if (!path0)
return NC_EINVAL;
{stat = NC_EINVAL; goto done;}
/* Capture the inmemory related flags */
mmap = ((omode & NC_MMAP) == NC_MMAP);

View File

@ -143,7 +143,15 @@ static const struct MACRODEF {
{NULL,NULL,{NULL}}
};
/* Mode inferences: if mode contains key, then add the inference and infer again */
/*
Mode inferences: if mode contains key value, then add the inferred value;
Warning: be careful how this list is constructed to avoid infinite inferences.
In order to (mostly) avoid that consequence, any attempt to
infer a value that is already present will be ignored.
This effectively means that the inference graph
must be a DAG and may not have cycles.
You have been warned.
*/
static const struct MODEINFER {
char* key;
char* inference;
@ -151,6 +159,7 @@ static const struct MODEINFER {
{"zarr","nczarr"},
{"xarray","zarr"},
{"noxarray","nczarr"},
{"noxarray","zarr"},
{NULL,NULL}
};
@ -202,6 +211,7 @@ static int processmacros(NClist** fraglistp);
static char* envvlist2string(NClist* pairs, const char*);
static void set_default_mode(int* cmodep);
static int parseonchar(const char* s, int ch, NClist* segments);
static int mergelist(NClist** valuesp);
static int openmagic(struct MagicFile* file);
static int readmagic(struct MagicFile* file, long pos, char* magic);
@ -217,8 +227,9 @@ static int parsepair(const char* pair, char** keyp, char** valuep);
static NClist* parsemode(const char* modeval);
static const char* getmodekey(const NClist* envv);
static int replacemode(NClist* envv, const char* newval);
static int inferone(const char* mode, NClist* newmodes);
static void infernext(NClist* current, NClist* next);
static int negateone(const char* mode, NClist* modes);
static void cleanstringlist(NClist* strs, int caseinsensitive);
/*
If the path looks like a URL, then parse it, reformat it.
@ -416,28 +427,6 @@ envvlist2string(NClist* envv, const char* delim)
return result;
}
/* Convert a list into a comma'd string */
static char*
list2string(NClist* list)
{
int i;
NCbytes* buf = NULL;
char* result = NULL;
if(list == NULL || nclistlength(list)==0) return strdup("");
buf = ncbytesnew();
for(i=0;i<nclistlength(list);i++) {
const char* m = nclistget(list,i);
if(m == NULL || strlen(m) == 0) continue;
if(i > 0) ncbytescat(buf,",");
ncbytescat(buf,m);
}
result = ncbytesextract(buf);
ncbytesfree(buf);
if(result == NULL) result = strdup("");
return result;
}
/* Given a mode= argument, fill in the impl */
static int
processmodearg(const char* arg, NCmodel* model)
@ -504,9 +493,10 @@ processinferences(NClist* fraglenv)
{
int stat = NC_NOERR;
const char* modeval = NULL;
NClist* modes = NULL;
NClist* newmodes = nclistnew();
int i,inferred = 0;
NClist* currentmodes = NULL;
NClist* nextmodes = nclistnew();
int i;
char* newmodeval = NULL;
if(fraglenv == NULL || nclistlength(fraglenv) == 0) goto done;
@ -515,22 +505,53 @@ processinferences(NClist* fraglenv)
if((modeval = getmodekey(fraglenv))==NULL) goto done;
/* Get the mode as list */
modes = parsemode(modeval);
currentmodes = parsemode(modeval);
/* Repeatedly walk the mode list until no more new positive inferences */
do {
for(i=0;i<nclistlength(modes);i++) {
const char* mode = nclistget(modes,i);
inferred = inferone(mode,newmodes);
nclistpush(newmodes,strdup(mode)); /* keep key */
if(!inferred) nclistpush(newmodes,strdup(mode));
#ifdef DEBUG
printlist(currentmodes,"processinferences: initial mode list");
#endif
/* Do what amounts to breadth first inferencing down the inference DAG. */
for(;;) {
NClist* tmp = NULL;
/* Compute the next set of inferred modes */
#ifdef DEBUG
printlist(currentmodes,"processinferences: current mode list");
#endif
infernext(currentmodes,nextmodes);
#ifdef DEBUG
printlist(nextmodes,"processinferences: next mode list");
#endif
/* move current modes into list of newmodes */
for(i=0;i<nclistlength(currentmodes);i++) {
nclistpush(newmodes,nclistget(currentmodes,i));
}
} while(inferred);
nclistsetlength(currentmodes,0); /* clear current mode list */
if(nclistlength(nextmodes) == 0) break; /* nothing more to do */
#ifdef DEBUG
printlist(newmodes,"processinferences: new mode list");
#endif
/* Swap current and next */
tmp = currentmodes;
currentmodes = nextmodes;
nextmodes = tmp;
tmp = NULL;
}
/* cleanup any unused elements in currenmodes */
nclistclearall(currentmodes);
/* Ensure no duplicates */
cleanstringlist(newmodes,1);
#ifdef DEBUG
printlist(newmodes,"processinferences: final inferred mode list");
#endif
/* Remove negative inferences */
for(i=0;i<nclistlength(modes);i++) {
const char* mode = nclistget(modes,i);
inferred = negateone(mode,newmodes);
for(i=0;i<nclistlength(newmodes);i++) {
const char* mode = nclistget(newmodes,i);
negateone(mode,newmodes);
}
/* Store new mode value */
@ -541,11 +562,13 @@ processinferences(NClist* fraglenv)
done:
nullfree(newmodeval);
nclistfreeall(modes);
nclistfreeall(newmodes);
nclistfreeall(currentmodes);
nclistfreeall(nextmodes);
return check(stat);
}
static int
negateone(const char* mode, NClist* newmodes)
{
@ -568,23 +591,28 @@ negateone(const char* mode, NClist* newmodes)
return changed;
}
static int
inferone(const char* mode, NClist* newmodes)
static void
infernext(NClist* current, NClist* next)
{
const struct MODEINFER* tests = modeinferences;
int changed = 0;
for(;tests->key;tests++) {
if(strcasecmp(tests->key,mode)==0) {
/* Append the inferred mode; dups removed later */
nclistpush(newmodes,strdup(tests->inference));
changed = 1;
int i;
for(i=0;i<nclistlength(current);i++) {
const struct MODEINFER* tests = NULL;
const char* cur = nclistget(current,i);
for(tests=modeinferences;tests->key;tests++) {
if(strcasecmp(tests->key,cur)==0) {
/* Append the inferred mode unless dup */
if(!nclistmatch(next,tests->inference,1))
nclistpush(next,strdup(tests->inference));
}
}
}
return changed;
}
/*
Given a list of strings, remove nulls and duplicates
*/
static int
mergekey(NClist** valuesp)
mergelist(NClist** valuesp)
{
int i,j;
int stat = NC_NOERR;
@ -686,12 +714,12 @@ cleanfragments(NClist** fraglenvp)
/* collect all unique keys */
collectallkeys(fraglenv,allkeys);
/* Collect all values for same key across all fragments */
/* Collect all values for same key across all fragment pairs */
for(i=0;i<nclistlength(allkeys);i++) {
key = nclistget(allkeys,i);
collectvaluesbykey(fraglenv,key,tmp);
/* merge the key values, remove duplicate */
if((stat=mergekey(&tmp))) goto done;
if((stat=mergelist(&tmp))) goto done;
/* Construct key,value pair and insert into newlist */
key = strdup(key);
nclistpush(newlist,key);
@ -923,7 +951,7 @@ NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void
}
} else {/* Not URL */
if(*newpathp) *newpathp = NULL;
if(newpathp) *newpathp = NULL;
}
/* Phase 8: mode inference from mode flags */
@ -1101,6 +1129,71 @@ parsemode(const char* modeval)
return modes;
}
/* Convert a list into a comma'd string */
static char*
list2string(NClist* list)
{
int i;
NCbytes* buf = NULL;
char* result = NULL;
if(list == NULL || nclistlength(list)==0) return strdup("");
buf = ncbytesnew();
for(i=0;i<nclistlength(list);i++) {
const char* m = nclistget(list,i);
if(m == NULL || strlen(m) == 0) continue;
if(i > 0) ncbytescat(buf,",");
ncbytescat(buf,m);
}
result = ncbytesextract(buf);
ncbytesfree(buf);
if(result == NULL) result = strdup("");
return result;
}
#if 0
/* Given a comma separated string, remove duplicates; mostly used to cleanup mode list */
static char*
cleancommalist(const char* commalist, int caseinsensitive)
{
NClist* tmp = nclistnew();
char* newlist = NULL;
if(commalist == NULL || strlen(commalist)==0) return nulldup(commalist);
(void)parseonchar(commalist,',',tmp);/* split on commas */
cleanstringlist(tmp,caseinsensitive);
newlist = list2string(tmp);
nclistfreeall(tmp);
return newlist;
}
#endif
/* Given a list of strings, remove nulls and duplicated */
static void
cleanstringlist(NClist* strs, int caseinsensitive)
{
int i,j;
if(nclistlength(strs) == 0) return;
/* Remove nulls */
for(i=nclistlength(strs)-1;i>=0;i--) {
if(nclistget(strs,i)==NULL) nclistremove(strs,i);
}
/* Remove duplicates*/
for(i=0;i<nclistlength(strs);i++) {
const char* value = nclistget(strs,i);
/* look ahead for duplicates */
for(j=nclistlength(strs)-1;j>i;j--) {
int match;
const char* candidate = nclistget(strs,j);
if(caseinsensitive)
match = (strcasecmp(value,candidate) == 0);
else
match = (strcmp(value,candidate) == 0);
if(match) {char* dup = nclistremove(strs,j); nullfree(dup);}
}
}
}
/**************************************************/
/**
* @internal Given an existing file, figure out its format and return
@ -1502,8 +1595,10 @@ printlist(NClist* list, const char* tag)
{
int i;
fprintf(stderr,"%s:",tag);
for(i=0;i<nclistlength(list);i++)
for(i=0;i<nclistlength(list);i++) {
fprintf(stderr," %s",(char*)nclistget(list,i));
fprintf(stderr,"[%p]",(char*)nclistget(list,i));
}
fprintf(stderr,"\n");
dbgflush();
}

View File

@ -122,9 +122,7 @@ ncbytesappend(NCbytes* bb, char elem)
int
ncbytescat(NCbytes* bb, const char* s)
{
if(s == NULL) {
return 1;
}
if(s == NULL) return 1;
ncbytesappendn(bb,(void*)s,strlen(s)+1); /* include trailing null*/
/* back up over the trailing null*/
if(bb->length == 0) return ncbytesfail();

View File

@ -183,6 +183,7 @@ nclistremove(NClist* l, size_t i)
return elem;
}
/* Match on == */
int
nclistcontains(NClist* l, void* elem)
{
@ -193,7 +194,7 @@ nclistcontains(NClist* l, void* elem)
return 0;
}
/* Return 1/0 */
/* Match on str(case)cmp */
int
nclistmatch(NClist* l, const char* elem, int casesensitive)
{
@ -230,7 +231,6 @@ nclistelemremove(NClist* l, void* elem)
return found;
}
/* Extends nclist to include a unique operator
which remove duplicate values; NULL values removed
return value is always 1.

View File

@ -1429,6 +1429,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
char* varpath = NULL;
char* key = NULL;
NCZ_FILE_INFO_T* zinfo = NULL;
NC_VAR_INFO_T* var = NULL;
NCZ_VAR_INFO_T* zvar = NULL;
NCZMAP* map = NULL;
NCjson* jvar = NULL;
@ -1460,7 +1461,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
/* Load each var in turn */
for(i = 0; i < nclistlength(varnames); i++) {
NC_VAR_INFO_T* var;
const char* varname = nclistget(varnames,i);
if((stat = nc4_var_list_add2(grp, varname, &var)))
goto done;
@ -1477,10 +1477,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
/* Indicate we do not have quantizer yet */
var->quantize_mode = -1;
/* Set filter list */
assert(var->filters == NULL);
var->filters = (void*)nclistnew();
/* Construct var path */
if((stat = NCZ_varkey(var,&varpath)))
goto done;
@ -1697,9 +1693,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
object MUST contain a "id" key identifying the codec to be used. */
/* Do filters key before compressor key so final filter chain is in correct order */
{
#ifdef ENABLE_NCZARR_FILTERS
if(var->filters == NULL) var->filters = (void*)nclistnew();
if(zvar->incompletefilters == NULL) zvar->incompletefilters = (void*)nclistnew();
#ifdef ENABLE_NCZARR_FILTERS
{ int k;
chainindex = 0; /* track location of filter in the chain */
if((stat = NCZ_filter_initialize())) goto done;
@ -1722,8 +1718,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
/* From V2 Spec: A JSON object identifying the primary compression codec and providing
configuration parameters, or ``null`` if no compressor is to be used. */
{
if(var->filters == NULL) var->filters = (void*)nclistnew();
#ifdef ENABLE_NCZARR_FILTERS
if(var->filters == NULL) var->filters = (void*)nclistnew();
if((stat = NCZ_filter_initialize())) goto done;
if((stat = NCJdictget(jvar,"compressor",&jfilter))) goto done;
if(jfilter != NULL && NCJsort(jfilter) != NCJ_NULL) {
@ -1752,6 +1748,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
nullfree(shapes); shapes = NULL;
if(formatv1) {NCJreclaim(jncvar); jncvar = NULL;}
NCJreclaim(jvar); jvar = NULL;
var = NULL;
}
done:

View File

@ -391,9 +391,11 @@ NCZ_def_var(int ncid, const char *name, nc_type xtype, int ndims,
var->meta_read = NC_TRUE;
var->atts_read = NC_TRUE;
#ifdef ENABLE_NCZARR_FILTERS
/* Set the filter list */
assert(var->filters == NULL);
var->filters = (void*)nclistnew();
#endif
/* Point to the type, and increment its ref. count */
var->type_info = type;
@ -558,10 +560,12 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1,
/* Can't turn on parallel and deflate/fletcher32/szip/shuffle
* before HDF5 1.10.3. */
#ifdef ENABLE_NCZARR_FILTERS
#ifndef HDF5_SUPPORTS_PAR_FILTERS
if (h5->parallel == NC_TRUE)
if (nclistlength(((NClist*)var->filters)) > 0 || fletcher32 || shuffle)
{retval = NC_EINVAL; goto done;}
#endif
#endif
/* If the HDF5 dataset has already been created, then it is too
@ -628,8 +632,10 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1,
* no filters in use for this data. */
if (storage != NC_CHUNKED)
{
#ifdef NCZARR_FILTERS
if (nclistlength(((NClist*)var->filters)) > 0)
{retval = NC_EINVAL; goto done;}
#endif
for (d = 0; d < var->ndims; d++)
if (var->dim[d]->unlimited)
{retval = NC_EINVAL; goto done;}