Fix JSON quoted string processing in libnczarr

re: github issue https://github.com/Unidata/netcdf-c/issues/1982

The problem was that the libnczarr/zsjon.c handling of strings with
embedded double quotes was wrong; a one line fix.
Also added a test case.

Misc. other changes:

1. I Discovered, en passant, that the handling of 64 bit constants
had an error that was fixed.
2. cleanup of the constant conversion code to recurse on arrays of values.
This commit is contained in:
Dennis Heimbigner 2021-05-06 16:39:44 -06:00
parent e6bdee5930
commit 91168e33a0
15 changed files with 128 additions and 71 deletions

1
.gitignore vendored
View File

@ -40,7 +40,6 @@ CMakeLists.txt.user
scan-build
.deps
.libs
*.zip
Makefile
.DS_Store
build-par

View File

@ -490,7 +490,6 @@ movetor(NCDAPCOMMON* nccomm,
CDFnode* xnode = (CDFnode*)nclistget(path,depth);
OCdatanode reccontent = NULL;
OCdatanode dimcontent = NULL;
OCdatanode fieldcontent = NULL;
Dapodometer* odom = NULL;
int hasstringdim = 0;
DCEsegment* segment;
@ -605,7 +604,6 @@ fprintf(stderr," segment=%s hasstringdim=%d\n",
done:
oc_data_free(conn,dimcontent);
oc_data_free(conn,fieldcontent);
oc_data_free(conn,reccontent);
if(ocstat != OC_NOERR) ncstat = ocerrtoncerr(ocstat);
if(odom) dapodom_free(odom);
@ -627,8 +625,6 @@ movetofield(NCDAPCOMMON* nccomm,
size_t fieldindex,gridindex;
OClink conn = nccomm->oc.conn;
CDFnode* xnode = (CDFnode*)nclistget(path,depth);
OCdatanode reccontent = NULL;
OCdatanode dimcontent = NULL;
OCdatanode fieldcontent = NULL;
CDFnode* xnext;
int newdepth;
@ -675,9 +671,7 @@ movetofield(NCDAPCOMMON* nccomm,
segments);
done:
oc_data_free(conn,dimcontent);
oc_data_free(conn,fieldcontent);
oc_data_free(conn,reccontent);
if(ocstat != OC_NOERR) ncstat = ocerrtoncerr(ocstat);
return THROW(ncstat);
}

View File

@ -1626,7 +1626,6 @@ getseqdimsize(NCDAPCOMMON* dapcomm, CDFnode* seq, size_t* sizep)
NCerror ncstat = NC_NOERR;
OCerror ocstat = OC_NOERR;
OClink conn = dapcomm->oc.conn;
OCdatanode rootcontent = NULL;
OCddsnode ocroot;
CDFnode* dxdroot;
CDFnode* xseq;
@ -1685,7 +1684,6 @@ fprintf(stderr,"sequencesize: %s = %lu\n",seq->ocname,(unsigned long)seqsize);
fail:
ncbytesfree(seqcountconstraints);
oc_data_free(conn,rootcontent);
if(ocstat != OC_NOERR) ncstat = ocerrtoncerr(ocstat);
return ncstat;
}

View File

@ -29,8 +29,8 @@ extern int ncz_unload_jatts(NCZ_FILE_INFO_T*, NC_OBJ* container, NCjson* jattrs,
extern int ncz_close_file(NC_FILE_INFO_T* file, int abort);
/* zcvt.c */
extern int NCZ_convert1(NCjson* jsrc, nc_type, char* memory0);
extern int NCZ_stringconvert1(nc_type typid, char* src, char** strp);
extern int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0);
extern int NCZ_stringconvert1(nc_type typid, unsigned char* src, char** strp);
extern int NCZ_stringconvert(nc_type typid, size_t len, void* data0, NCjson** jdatap);
/* zsync.c */

View File

@ -23,7 +23,7 @@ struct ZCVT {
};
int
NCZ_convert1(NCjson* jsrc, nc_type dsttype, char* memory)
NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory)
{
int stat = NC_NOERR;
nc_type srctype;
@ -234,7 +234,7 @@ done:
}
int
NCZ_stringconvert1(nc_type srctype, char* src, char** strp)
NCZ_stringconvert1(nc_type srctype, unsigned char* src, char** strp)
{
int stat = NC_NOERR;
struct ZCVT zcvt;

View File

@ -318,7 +318,7 @@ NCJlex(NCJparser* parser)
start = parser->pos;
for(;;) {
c = *parser->pos++;
if(c == NCJ_ESCAPE) c++;
if(c == NCJ_ESCAPE) parser->pos++;
else if(c == NCJ_QUOTE || c == '\0') break;
}
if(c == '\0') {

View File

@ -30,7 +30,7 @@ static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimnames, size64_t* shape, NC_
static int decodeints(NCjson* jshape, size64_t* shapes);
static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap);
static int inferattrtype(NCjson* values, nc_type* typeidp);
static int mininttype(unsigned long long u64);
static int mininttype(unsigned long long u64, int negative);
static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims);
/**************************************************/
@ -825,7 +825,7 @@ zconvert(nc_type typeid, size_t typelen, void* dst0, NCjson* src)
int stat = NC_NOERR;
int i;
size_t len;
char* dst = dst0; /* Work in char* space so we can do pointer arithmetic */
unsigned char* dst = dst0; /* Work in char* space so we can do pointer arithmetic */
switch (src->sort) {
case NCJ_ARRAY:
@ -902,6 +902,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
void* data = NULL;
size_t typelen;
nc_type typeid = NC_NAT;
int reclaimvalues = 0;
/* Get assumed type */
if(typeidp) typeid = *typeidp;
@ -941,56 +942,42 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
if(typeidp) *typeidp = typeid; /* return possibly inferred type */
done:
if(reclaimvalues) NCJreclaim(values); /* we created it */
nullfree(data);
return THROW(stat);
}
static int
inferattrtype(NCjson* values, nc_type* typeidp)
inferattrtype(NCjson* value, nc_type* typeidp)
{
nc_type typeid;
NCjson* j = NULL;
unsigned long long u64;
long long i64;
int negative = 0;
if(NCJlength(values) == 0) return NC_EINVAL;
switch (values->sort) {
case NCJ_ARRAY:
/* use the first value to decide */
if(NCJarrayith(values,0,&j)) return NC_EINVAL;
switch(j->sort) {
case NCJ_INT:
if(j->value[0] == '-') {
sscanf(j->value,"%lld",&i64);
u64 = (unsigned long long)i64;
} else
sscanf(j->value,"%llu",&u64);
typeid = mininttype(u64);
break;
case NCJ_DOUBLE:
typeid = NC_DOUBLE;
break;
case NCJ_BOOLEAN:
typeid = NC_UBYTE;
break;
default: return NC_EINTERNAL;
}
break;
/* Might be a singleton */
if(NCJlength(value) == 0) return NC_EINVAL;
if(value->sort == NCJ_ARRAY) {
if(NCJarrayith(value,0,&j)) return NC_EINVAL;
return inferattrtype(j,typeidp);
}
if(value->value)
negative = (value->value[0] == '-');
switch (value->sort) {
case NCJ_INT:
if(values->value[0] == '-') {
sscanf(values->value,"%lld",&i64);
u64 = (unsigned long long)i64;
} else
sscanf(values->value,"%llu",&u64);
typeid = mininttype(u64);
break;
if(negative) {
sscanf(value->value,"%lld",&i64);
u64 = (unsigned long long)i64;
} else
sscanf(value->value,"%llu",&u64);
typeid = mininttype(u64,negative);
break;
case NCJ_DOUBLE:
typeid = NC_DOUBLE;
break;
typeid = NC_DOUBLE;
break;
case NCJ_BOOLEAN:
typeid = NC_UBYTE;
break;
typeid = NC_UBYTE;
break;
case NCJ_STRING: /* requires special handling as an array of characters */
typeid = NC_CHAR;
break;
@ -1002,10 +989,10 @@ inferattrtype(NCjson* values, nc_type* typeidp)
}
static int
mininttype(unsigned long long u64)
mininttype(unsigned long long u64, int negative)
{
long long i64 = (long long)u64; /* keep bit pattern */
if(u64 >= NC_MAX_INT64) return NC_UINT64;
if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64;
if(i64 < 0) {
if(i64 >= NC_MIN_BYTE) return NC_BYTE;
if(i64 >= NC_MIN_SHORT) return NC_SHORT;
@ -1175,7 +1162,7 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container)
if(jattrs != NULL) {
/* Iterate over the attributes to create the in-memory attributes */
/* Watch for reading _FillValue and possibly _ARRAY_DIMENSIONS (xarray) */
/* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray) */
for(i=0;i<nclistlength(jattrs->contents);i+=2) {
NCjson* key = nclistget(jattrs->contents,i);
NCjson* value = nclistget(jattrs->contents,i+1);
@ -2032,3 +2019,43 @@ done:
NCJreclaim(jatts);
return THROW(stat);
}
#if 0
Not currently used
Special compatibility case:
if the value of the attribute is a dictionary,
or an array with non-atomic values, then
then stringify it and pretend it is of char type.
/* Return 1 if this json is not an
atomic value or an array of atomic values.
That is, it does not look like valid
attribute data.
*/
static int
iscomplexjson(NCjson* j)
{
int i;
switch(j->sort) {
case NCJ_ARRAY:
/* verify that the elements of the array are not complex */
for(i=0;i<nclistlength(j->contents);i++) {
switch (((NCjson*)nclistget(j->contents,i))->sort) {
case NCJ_DICT:
case NCJ_ARRAY:
case NCJ_UNDEF:
case NCJ_NULL:
return 1;
default: break;
}
}
return 0;
case NCJ_DICT:
case NCJ_UNDEF:
case NCJ_NULL:
break;
default:
return 0;
}
return 1;
}
#endif

View File

@ -117,7 +117,7 @@ ref_avail1.cdl ref_avail1.dmp ref_avail1.txt \
ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl ref_nczarr2zarr.cdl
# Interoperability files
EXTRA_DIST += power_901_constants.zip ref_power_901_constants.cdl
EXTRA_DIST += ref_power_901_constants.zip ref_power_901_constants.cdl ref_quotes.zip ref_quotes.cdl
CLEANFILES = ut_*.txt ut*.cdl tmp*.nc tmp*.cdl tmp*.txt tmp*.dmp tmp*.zip tmp*.nc

Binary file not shown.

View File

@ -1,4 +1,4 @@
netcdf power_901_constants {
netcdf ref_power_901_constants {
dimensions:
lat = 361 ;
lon = 576 ;

Binary file not shown.

View File

@ -0,0 +1,19 @@
netcdf ref_quotes {
dimensions:
time = 10 ;
lat = 20 ;
lon = 30 ;
variables:
float fractional_snow_cover(time, lat, lon) ;
fractional_snow_cover:ID = 68b ;
fractional_snow_cover:esa_cci_path = NaN ;
fractional_snow_cover:long_name = "Surface Fraction Covered by Snow" ;
fractional_snow_cover:orig_attrs = "{\'comment\': \'Grid cell fractional snow cover based on the Globsnow CCI product.\', \'long_name\': \'Surface fraction covered by snow.\', \'project_name\': \'GlobSnow\', \'references\': \'Luojus, Kari, et al. \"ESA DUE Globsnow-Global Snow Database for Climate Research.\" ESA Special Publication. Vol. 686. 2010.\', \'source_name\': \'MFSC\', \'standard_name\': \'surface_snow_area_fraction\', \'units\': \'percent\', \'url\': \'http://www.globsnow.info/\'}" ;
fractional_snow_cover:orig_version = "v2.0" ;
fractional_snow_cover:project_name = "GlobSnow" ;
fractional_snow_cover:time_coverage_end = "2013-01-05" ;
fractional_snow_cover:time_coverage_resolution = "P8D" ;
fractional_snow_cover:time_coverage_start = "2003-01-05" ;
fractional_snow_cover:units = "percent" ;
fractional_snow_cover:url = "http://www.globsnow.info/" ;
}

BIN
nczarr_test/ref_quotes.zip Normal file

Binary file not shown.

View File

@ -19,7 +19,7 @@ testcasefile() {
fileargs ${execdir}/$ref "mode=$mode,$zext"
rm -f tmp_${ref}_${zext}.cdl
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/${ref}.cdl tmp_${ref}_${zext}.cdl
}
testcasezip() {
@ -30,7 +30,7 @@ testcasezip() {
fileargs ${execdir}/$ref "mode=$mode,$zext"
rm -f tmp_${ref}_${zext}.cdl
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/${ref}.cdl tmp_${ref}_${zext}.cdl
}
testallcases() {
@ -38,17 +38,20 @@ zext=$1
case "$zext" in
file)
# need to unpack
rm -fr power_901_constants power_901_constants.file
unzip ${srcdir}/power_901_constants.zip > /dev/null
mv power_901_constants power_901_constants.file
testcasefile power_901_constants zarr metaonly; # test xarray as default
rm -fr ref_power_901_constants ref_power_901_constants.file
unzip ${srcdir}/ref_power_901_constants.zip > /dev/null
mv ref_power_901_constants ref_power_901_constants.file
testcasefile ref_power_901_constants zarr metaonly; # test xarray as default
;;
zip)
# Move into position
if test "x$srcdir" != "x$execdir" ; then
cp ${srcdir}/power_901_constants.zip ${execdir}
cp ${srcdir}/ref_power_901_constants.zip ${execdir}
cp ${srcdir}/ref_quotes.zip ${execdir}
fi
testcasezip power_901_constants xarray metaonly
testcasezip ref_power_901_constants xarray metaonly
# Test large constant interoperability
testcasezip ref_quotes zarr metaonly
;;
*) echo "unimplemented kind: $1" ; exit 1;;
esac

View File

@ -36,7 +36,8 @@ typedef enum OBJKIND {
OK_NONE=0,
OK_META=1,
OK_CHUNK=2,
OK_GROUP=3
OK_GROUP=3,
OK_IGNORE=4
} OBJKIND;
static struct Mops {
@ -75,6 +76,8 @@ struct Dumpptions {
NCZM_IMPL impl;
char* rootpath;
const struct Type* nctype;
int xflags;
# define XNOZMETADATA 1
} dumpoptions;
/* Forward */
@ -129,10 +132,11 @@ main(int argc, char** argv)
{
int stat = NC_NOERR;
int c;
char* p;
memset((void*)&dumpoptions,0,sizeof(dumpoptions));
while ((c = getopt(argc, argv, "dvx:t:T:")) != EOF) {
while ((c = getopt(argc, argv, "dvx:t:T:X:")) != EOF) {
switch(c) {
case 'd':
dumpoptions.debug = 1;
@ -151,6 +155,14 @@ main(int argc, char** argv)
case 'T':
nctracelevel(atoi(optarg));
break;
case 'X':
for(p=optarg;*p;p++) {
switch (*p) {
case 'm': dumpoptions.xflags |= XNOZMETADATA; break;
default: fprintf(stderr,"Unknown -X argument: %c",*p); break;
}
};
break;
case '?':
fprintf(stderr,"unknown option\n");
goto fail;
@ -323,7 +335,9 @@ objdump(void)
printf("[%d] %s : (%llu)",depth,obj,len);
if(kind == OK_CHUNK) printf(" (%s)",dumpoptions.nctype->typename);
printf(" |");
printcontent(len,content,kind);
if(kind != OK_IGNORE) {
printcontent(len,content,kind);
}
printf("|\n");
} else {
printf("[%d] %s : (%llu) ||\n",depth,obj,len);
@ -445,9 +459,12 @@ keykind(const char* key)
if(suffix) {
if(suffix[0] != '/')
kind = OK_NONE;
else if(suffix[1] == '.')
kind = OK_META;
else if(suffix[strlen(suffix)-1] == '/')
else if(suffix[1] == '.') {
if(strcmp(&suffix[1],".zmetadata")==0 && (dumpoptions.xflags & XNOZMETADATA))
kind = OK_IGNORE;
else
kind = OK_META;
} else if(suffix[strlen(suffix)-1] == '/')
kind = OK_GROUP;
else {
char* p = suffix+1;