Regularize the scoping of types

re: Github issue https://github.com/Unidata/netcdf-c/issues/1956

The function NC_compare_nc_types in libdispatch/dcopy.c uses an
incorrect algorithm to search for types. The core of this is the
function NC_rec_find_nc_type in libdispatch/dcopy.c. Currently
it searchs the current group and its subtree.

Additionally, the function NC4_inq_typeid in libsrc4/nc4internal.c
has been extended to handle fully qualified names. It was originally
designed to do this, but for some reason never completed.

The NC_rec_find_nc_type algorithm has been altered to match the
algorithm used by NC4_inq_typeid. It operates as follows.

Given a file F, group G and a type T. It searches file F2, group
G2, for another type T2 that is equivalent to T.

The search order is as follows.
1. Search G2 for a type T2 equivalent to T.
2. Search upwards in the ancestor groups of G2 for a type T2 equivalent to T.
3. Search the complete group tree of F2 in pre-order, breadth-first order to locate T2 equivalent to T.

Also add a test case to validate algorithm: ncdump/test_scope.sh.

Note, this change may cause compatibility problems, though it is
unlikely because two different equivalent type declarations in
one dataset is unlikely.
This commit is contained in:
Dennis Heimbigner 2021-03-06 14:09:37 -07:00
parent f388bcec15
commit 0428c38b1e
12 changed files with 441 additions and 77 deletions

View File

@ -4,7 +4,7 @@
name: Run netCDF Tests
on: [pull_request]
on: [pull_request,push]
jobs:

View File

@ -16,6 +16,9 @@
#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
/*
This is included in bottom

View File

@ -10,8 +10,13 @@
#include "config.h"
#include "ncdispatch.h"
#include "nc_logging.h"
#include "nclist.h"
#ifdef USE_NETCDF4
static int searchgroup(int ncid1, int tid1, int grp, int* tid2);
static int searchgrouptree(int ncid1, int tid1, int grp, int* tid2);
/**
* @internal Compare two netcdf types for equality. Must have the
* ncids as well, to find user-defined types.
@ -27,8 +32,7 @@
* @author Ed Hartnett
*/
static int
NC_compare_nc_types(int ncid1, int typeid1, int ncid2, int typeid2,
int *equalp)
NC_compare_nc_types(int ncid1, int typeid1, int ncid2, int typeid2, int *equalp)
{
int ret = NC_NOERR;
@ -152,8 +156,15 @@ NC_compare_nc_types(int ncid1, int typeid1, int ncid2, int typeid2,
}
/**
* @internal Recursively hunt for a netCDF type id. (Code from
* nc4internal.c); Return matching typeid or 0 if not found.
* @internal Recursively hunt for a netCDF type id, tid2, that is "equal" to tid1.
* Question is: what search order do we use? Ncgen uses root group tree in pre-order.
* But NC4_inq_typeid uses these rules:
* 1. ncid2
* 2. parents of ncid2 (up the tree to root)
* 3. root group tree in pre-order.
* We will leave ncgen for another day and use the nc_inq_typeid rule.
*
* Return matching typeid or 0 if not found.
*
* @param ncid1 File ID.
* @param tid1 Type ID.
@ -161,68 +172,35 @@ NC_compare_nc_types(int ncid1, int typeid1, int ncid2, int typeid2,
* @param tid2 Pointer that gets type ID of equal type.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
* @author Ed Hartnett, Dennis Heimbigner
*/
static int
NC_rec_find_nc_type(int ncid1, nc_type tid1, int ncid2, nc_type* tid2)
{
int i,ret = NC_NOERR;
int nids;
int* ids = NULL;
int ret = NC_NOERR;
int parent;
/* Get all types in grp ncid2 */
if(tid2)
*tid2 = 0;
if ((ret = nc_inq_typeids(ncid2, &nids, NULL)))
return ret;
if (nids)
{
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
return NC_ENOMEM;
if ((ret = nc_inq_typeids(ncid2, &nids, ids)))
return ret;
for(i = 0; i < nids; i++)
{
int equal = 0;
if ((ret = NC_compare_nc_types(ncid1, tid1, ncid2, ids[i], &equal)))
return ret;
if(equal)
{
if(tid2)
*tid2 = ids[i];
free(ids);
return NC_NOERR;
}
}
free(ids);
if((ret = searchgroup(ncid1,tid1,ncid2,tid2)))
goto done;
if(*tid2 != 0)
goto done; /* found */
/* Look in the parents of ncid2 upto the root */
switch (ret = nc_inq_grp_parent(ncid2,&parent)) {
case NC_NOERR:
/* Recurse up using parent grp */
ret = NC_rec_find_nc_type(ncid1, tid1, parent, tid2);
break;
case NC_ENOGRP:
/* do the breadth-first pre-order search of the whole tree */
/* ncid2 should be root group */
ret = searchgrouptree(ncid1,tid1,ncid2,tid2);
break;
default: break;
}
/* recurse */
if ((ret = nc_inq_grps(ncid1, &nids, NULL)))
return ret;
if (nids)
{
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
return NC_ENOMEM;
if ((ret = nc_inq_grps(ncid1, &nids, ids)))
{
free(ids);
return ret;
}
for (i = 0; i < nids; i++)
{
ret = NC_rec_find_nc_type(ncid1, tid1, ids[i], tid2);
if (ret && ret != NC_EBADTYPE)
break;
if (tid2 && *tid2 != 0) /* found */
{
free(ids);
return NC_NOERR;
}
}
free(ids);
}
return NC_EBADTYPE; /* not found */
done:
return ret;
}
/**
@ -711,3 +689,92 @@ nc_copy_att(int ncid_in, int varid_in, const char *name,
return NC_NOERR;
}
#ifdef USE_NETCDF4
/* Helper function for NC_rec_find_nc_type();
search a specified group for matching type.
*/
static int
searchgroup(int ncid1, int tid1, int grp, int* tid2)
{
int i,ret = NC_NOERR;
int nids;
int* ids = NULL;
/* Get all types in grp */
if(tid2)
*tid2 = 0;
if ((ret = nc_inq_typeids(grp, &nids, NULL)))
goto done;
if (nids)
{
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
{ret = NC_ENOMEM; goto done;}
if ((ret = nc_inq_typeids(grp, &nids, ids)))
goto done;
for(i = 0; i < nids; i++)
{
int equal = 0;
if ((ret = NC_compare_nc_types(ncid1, tid1, grp, ids[i], &equal)))
goto done;
if(equal)
{
if(tid2)
*tid2 = ids[i];
goto done;
}
}
}
done:
nullfree(ids);
return ret;
}
/* Helper function for NC_rec_find_nc_type();
search a tree of groups for a matching type
using a breadth first queue
*/
static int
searchgrouptree(int ncid1, int tid1, int grp, int* tid2)
{
int i,ret = NC_NOERR;
int nids;
int* ids = NULL;
NClist* queue = nclistnew();
int gid;
uintptr_t id;
id = grp;
nclistpush(queue,(void*)id); /* prime the queue */
while(nclistlength(queue) > 0) {
id = (uintptr_t)nclistremove(queue,0);
gid = (int)id;
if((ret = searchgroup(ncid1,tid1,gid,tid2)))
goto done;
if(*tid2 != 0)
goto done; /*we found it*/
/* Get subgroups of gid and push onto front of the queue (for breadth first) */
if((ret = nc_inq_grps(gid,&nids,NULL)))
goto done;
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
{ret = NC_ENOMEM; goto done;}
if ((ret = nc_inq_grps(gid, &nids, ids)))
goto done;
/* push onto the end of the queue */
for(i=0;i<nids;i++) {
id = ids[i];
nclistpush(queue,(void*)id);
}
}
/* Not found */
ret = NC_EBADTYPE;
done:
nclistfree(queue);
nullfree(ids);
return ret;
}
#endif

View File

@ -554,8 +554,8 @@ NC4_inq_typeid(int ncid, const char *name, nc_type *typeidp)
NC_GRP_INFO_T *grptwo;
NC_FILE_INFO_T *h5;
NC_TYPE_INFO_T *type = NULL;
char *norm_name;
int i, retval;
char *norm_name = NULL;
int i, retval = NC_NOERR;
/* Handle atomic types. */
for (i = 0; i < NUM_ATOMIC_TYPES; i++)
@ -563,27 +563,47 @@ NC4_inq_typeid(int ncid, const char *name, nc_type *typeidp)
{
if (typeidp)
*typeidp = i;
return NC_NOERR;
goto done;
}
/* Find info for this file and group, and set pointer to each. */
if ((retval = nc4_find_grp_h5(ncid, &grp, &h5)))
return retval;
goto done;
assert(h5 && grp);
/* If the first char is a /, this is a fully-qualified
* name. Otherwise, this had better be a local name (i.e. no / in
* the middle). */
if (name[0] != '/' && strstr(name, "/"))
return NC_EINVAL;
{retval = NC_EINVAL; goto done;}
/* Normalize name. */
if (!(norm_name = (char*)malloc(strlen(name) + 1)))
return NC_ENOMEM;
if ((retval = nc4_normalize_name(name, norm_name))) {
free(norm_name);
return retval;
{retval = NC_ENOMEM; goto done;}
if ((retval = nc4_normalize_name(name, norm_name)))
goto done;
/* If this is a fqn, then walk the sequence of parent groups to the last group
and see if that group has a type of the right name */
if(name[0] == '/') { /* FQN */
int rootncid = (grp->nc4_info->root_grp->hdr.id | grp->nc4_info->controller->ext_ncid);
int parent = 0;
char* lastname = strrchr(norm_name,'/'); /* break off the last segment: the type name */
if(lastname == norm_name)
{retval = NC_EINVAL; goto done;}
*lastname++ = '\0'; /* break off the lastsegment */
if((retval = NC4_inq_grp_full_ncid(rootncid,norm_name,&parent)))
goto done;
/* Get parent info */
if((retval=nc4_find_nc4_grp(parent,&grp)))
goto done;
/* See if type exists in this group */
type = (NC_TYPE_INFO_T*)ncindexlookup(grp->type,lastname);
if(type == NULL)
{retval = NC_EBADTYPE; goto done;}
goto done;
}
/* Is the type in this group? If not, search parents. */
for (grptwo = grp; grptwo; grptwo = grptwo->parent) {
type = (NC_TYPE_INFO_T*)ncindexlookup(grptwo->type,norm_name);
@ -602,13 +622,13 @@ NC4_inq_typeid(int ncid, const char *name, nc_type *typeidp)
if (typeidp)
*typeidp = type->hdr.id;
free(norm_name);
/* OK, I give up already! */
if (!type)
return NC_EBADTYPE;
{retval = NC_EBADTYPE; goto done;}
return NC_NOERR;
done:
nullfree(norm_name);
return retval;
}
/**

View File

@ -13,6 +13,7 @@ SET(ncdump_FILES ncdump.c vardata.c dumplib.c indent.c nctime0.c utils.c nciter.
SET(nccopy_FILES nccopy.c nciter.c chunkspec.c utils.c dimmap.c list.c)
SET(ocprint_FILES ocprint.c)
SET(ncvalidator_FILES ncvalidator.c)
SET(printfqn_FILES printfqn.c)
IF(USE_X_GETOPT)
SET(ncdump_FILES ${ncdump_FILES} XGetopt.c)
@ -27,6 +28,7 @@ ADD_EXECUTABLE(ncvalidator ${ncvalidator_FILES})
IF(USE_HDF5)
ADD_EXECUTABLE(nc4print nc4print.c nc4printer.c)
ADD_EXECUTABLE(printfqn ${printfqn_FILES})
ENDIF(USE_HDF5)
IF(ENABLE_DAP)
@ -39,6 +41,7 @@ TARGET_LINK_LIBRARIES(ncvalidator netcdf ${ALL_TLL_LIBS})
IF(USE_HDF5)
TARGET_LINK_LIBRARIES(nc4print netcdf ${ALL_TLL_LIBS})
TARGET_LINK_LIBRARIES(printfqn netcdf ${ALL_TLL_LIBS})
ENDIF(USE_HDF5)
IF(ENABLE_DAP)
@ -81,6 +84,15 @@ IF(MSVC)
${CMAKE_CURRENT_BINARY_DIR})
SET_TARGET_PROPERTIES(ncvalidator PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE
${CMAKE_CURRENT_BINARY_DIR})
SET_TARGET_PROPERTIES(printfqn PROPERTIES RUNTIME_OUTPUT_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR})
SET_TARGET_PROPERTIES(printfqn PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG
${CMAKE_CURRENT_BINARY_DIR})
SET_TARGET_PROPERTIES(printfqn PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE
${CMAKE_CURRENT_BINARY_DIR})
ENDIF(USE_HDF5)
IF(ENABLE_DAP)
@ -295,6 +307,10 @@ ENDIF(MSVC)
add_sh_test(ncdump test_keywords)
ENDIF()
IF(USE_CDF5)
add_sh_test(ncdump test_scope)
ENDIF()
ENDIF()
ENDIF()

View File

@ -46,6 +46,13 @@ if USE_HDF5
bin_PROGRAMS += nc4print
noinst_PROGRAMS += nc4print
nc4print_SOURCES = nc4print.c nc4printer.c
# Create a helper program for tst_scope.sh
# Program prints out the fqn of the type of
# a specified variable in the .nc file.
noinst_PROGRAMS += printfqn
printfqn_SOURCES = printfqn.c
endif
# Conditionally build the ocprint program, but do not install
@ -99,7 +106,7 @@ TESTS += tst_fileinfo.sh tst_hdf5_offset.sh tst_inttags4.sh \
tst_netcdf4.sh tst_fillbug.sh tst_netcdf4_4.sh tst_nccopy4.sh \
tst_nccopy5.sh tst_grp_spec.sh tst_mud.sh tst_h_scalar.sh tst_formatx4.sh \
run_utf8_nc4_tests.sh run_back_comp_tests.sh run_ncgen_nc4_tests.sh \
tst_ncgen4.sh
tst_ncgen4.sh test_scope.sh
# Record interscript dependencies so parallel builds work.
tst_nccopy4.log: run_ncgen_tests.log tst_output.log tst_ncgen4.log \
@ -162,7 +169,9 @@ ref_provenance_v1.nc ref_tst_radix.cdl tst_radix.cdl test_radix.sh \
ref_nccopy_w.cdl tst_nccopy_w3.sh tst_nccopy_w4.sh \
ref_no_ncproperty.nc test_unicode_directory.sh \
ref_roman_szip_simple.cdl ref_roman_szip_unlim.cdl ref_tst_perdimspecs.cdl \
test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl ref_tst_nofilters.cdl
test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl ref_tst_nofilters.cdl \
test_scope.sh type_group_only.cdl type_ancestor_only.cdl \
type_ancestor_subgroup.cdl type_preorder.cdl
# The L512.bin file is file containing exactly 512 bytes each of value 0.
# It is used for creating hdf5 files with varying offsets for testing.
@ -198,5 +207,6 @@ tst_ncf199.cdl tst_tst_gattenum.cdl tst_tst_usuffix.cdl ctest.c \
ctest64.c nccopy3_subset_out.nc camrun.c tst_ncf213.cdl tst_ncf213.nc \
tst_radix.nc tmp_radix.cdl ctest_small_3.c ctest_small_4.c \
ctest_special_atts_4.c tst_roman_szip_simple.cdl \
tst_roman_szip_unlim.cdl tst_perdimpspecs.nc tmppds.* \
keyword1.nc keyword2.nc tmp_keyword1.cdl tmp_keyword2.cdl
tst_roman_szip_unlim.cdl tst_perdimpspecs.nc tmppds.* \
keyword1.nc keyword2.nc tmp_keyword1.cdl tmp_keyword2.cdl \
type_*.nc copy_type_*.cdl

143
ncdump/printfqn.c Normal file
View File

@ -0,0 +1,143 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <netcdf.h>
#define CHECK(err) {if(err) report(err,__LINE__);}
static void
report(int err, int lineno)
{
fprintf(stderr,"Error: %d: %s\n", lineno, nc_strerror(err));
exit(1);
}
void
usage(void)
{
fprintf(stderr,"usage: printfqn <filename> <varname>\n");
exit(0);
}
int
get_type_parent(int ncid, int tid, int* parentp)
{
int stat = NC_NOERR;
int i;
int nids;
int ids[4096];
/* Does this group have the type we are searching for? */
if((stat=nc_inq_typeids(ncid,&nids,ids))) goto done;
assert(nids < 4096);
/* Search for this typeid */
for(i=0;i<nids;i++) {
if(ids[i] == tid) {
if(parentp) *parentp = ncid;
goto done;
}
}
/* Else Search subgroups. */
if((stat=nc_inq_grps(ncid,&nids,ids))) goto done;
assert(nids < 4096);
/* Recurse on each subgroup */
for(i=0;i<nids;i++) {
switch (stat = get_type_parent(ids[i],tid,parentp)) {
case NC_EBADTYPE: break; /* not found; keep looking */
case NC_NOERR: goto done; /* found it */
default: goto done; /* some other error */
}
}
stat = NC_EBADTYPE; /* Not found */
done:
return stat;
}
int
get_variable_info(int ncid, const char* name, int* gidp, int* vidp)
{
int stat = NC_NOERR;
int i;
int nids;
int ids[4096];
char varname[NC_MAX_NAME];
/* Assume only one occurrence of the variable in dataset */
/* Does this group have the variable we are searching for? */
if((stat=nc_inq_varids(ncid,&nids,ids))) goto done;
assert(nids < 4096);
/* Search for this variable name */
for(i=0;i<nids;i++) {
if((stat = nc_inq_varname(ncid,ids[i],varname))) goto done;
if(strcmp(name,varname)==0) {
if(gidp) *gidp = ncid;
if(vidp) *vidp = ids[i];
goto done;
}
}
/* Else Search subgroups. */
if((stat=nc_inq_grps(ncid,&nids,ids))) goto done;
assert(nids < 4096);
/* Recurse on each subgroup */
for(i=0;i<nids;i++) {
switch (stat = get_variable_info(ids[i],name,gidp,vidp)) {
case NC_ENOTVAR: break; /* not found; keep looking */
case NC_NOERR: goto done; /* found it */
default: goto done; /* some other error */
}
}
stat = NC_ENOTVAR; /* Not found */
done:
return stat;
}
int
main(int argc, char** argv)
{
int ncid, varid, gid, tid;
const char* filename = NULL;
char varname[NC_MAX_NAME];
size_t fqnlen, namelen;
char fqn[4096];
char name[NC_MAX_NAME];
if(argc < 3) usage();
filename = argv[1];
strcpy(varname,argv[2]);
CHECK(nc_open(filename,NC_NETCDF4,&ncid));
/* Locate the parent group for the variable */
CHECK(get_variable_info(ncid,varname,&gid,&varid));
/* Get the type id of the variable */
CHECK(nc_inq_vartype(gid,varid,&tid));
/* Get the type name */
CHECK(nc_inq_type(ncid,tid,name,&namelen));
/* Get the containing group id for the type */
CHECK(get_type_parent(ncid,tid,&gid));
/* Get the FQN name for the containing type */
CHECK(nc_inq_grpname_full(gid,&fqnlen,fqn));
assert(fqnlen < sizeof(fqn));
if(strcmp(fqn,"/")==0) fqn[0] = '\0';
/* report result with no newline */
printf("|%s/%s|",fqn,name);
return 0;
}

46
ncdump/test_scope.sh Executable file
View File

@ -0,0 +1,46 @@
#!/bin/bash
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
set -x
set -e
# Test cases
# group_only - type defined in same group as var/attr
# ancestor_only - type defined in some ancestor group of var/attr
# ancestor_subgroup - type defined in both ancestor group and subgroup
# preorder - type defined in some preceding, non ancestor group
TSTS="type_group_only type_ancestor_only type_ancestor_subgroup type_preorder"
SETUP=1
setup() {
${NCGEN} -4 -lb ${srcdir}/$1.cdl
}
testscope() {
${NCCOPY} ${execdir}/$1.nc ${execdir}/$1_copy.nc
${NCDUMP} -h -n $1 ${execdir}/$1_copy.nc > copy_$1.cdl
diff -wB ${srcdir}/$1.cdl ${execdir}/copy_$1.cdl
REFT=`${execdir}/printfqn ${execdir}/$1.nc test_variable`
COPYT=`${execdir}/printfqn ${execdir}/$1_copy.nc test_variable`
if test "x$REFT" != "x$COPYT" ; then
echo "***Fail: ref=${REFT} copy=${COPYT}"
fi
}
if test "x$SETUP" = x1 ; then
for t in $TSTS ; do
setup $t
done
fi
for t in $TSTS ; do
testscope $t
done
exit 0

View File

@ -0,0 +1,21 @@
netcdf type_ancestor_only {
types:
ubyte enum test_enum_type {OPTION1 = 0, OPTION2 = 1, OPTION3 = 2} ;
group: test_group {
variables:
test_enum_type test_variable ;
test_enum_type test_variable:_FillValue = OPTION1 ;
group: test_subgroup1 {
group: test_subsubgroup {
} // group test_subsubgroup
} // group test_subgroup1
group: test_subgroup2 {
} // group test_subgroup2
} // group test_group
}

View File

@ -0,0 +1,15 @@
netcdf type_ancestor_subgroup {
types:
ubyte enum test_enum_type {OPTION1 = 0, OPTION2 = 1, OPTION3 = 2} ;
group: test_group {
variables:
test_enum_type test_variable ;
test_enum_type test_variable:_FillValue = OPTION1 ;
group: sub_group {
types:
ubyte enum test_enum_type {OPTION1 = 0, OPTION2 = 1, OPTION3 = 2} ;
} // group sub_group
} // group test_group
}

View File

@ -0,0 +1,10 @@
netcdf type_group_only {
group: test_group {
types:
ubyte enum test_enum_type {OPTION1 = 0, OPTION2 = 1, OPTION3 = 2} ;
variables:
test_enum_type test_variable ;
test_enum_type test_variable:_FillValue = OPTION1 ;
} // group test_group
}

13
ncdump/type_preorder.cdl Normal file
View File

@ -0,0 +1,13 @@
netcdf type_preorder {
group: preorder {
types:
ubyte enum test_enum_type {OPTION1 = 0, OPTION2 = 1, OPTION3 = 2} ;
} // group preorder
group: test_group {
variables:
/preorder/test_enum_type test_variable ;
/preorder/test_enum_type test_variable:_FillValue = OPTION1 ;
} // group test_group
}