Fix error where not converting fill data

re: Github Issue https://github.com/Unidata/netcdf-c/issues/1826

It turns out that the common get code (NC4_get_vars) in libhdf5
(and libnczarr) has an optimization where it does not attempt to
read from the file if the file is all fill values. Rather it
just fills the output buffer with the fill value.  The problem
is that -- in that case -- it forgets that conversion might still be
needed.  So the conversion never occurs and the raw bits of
the fill data are stored directly into the memory space.

Solution: move some code around to properly do the
conversion no matter how the data was obtained.

Added a test cases nc_test4/test_fillonly.sh and
nczarr_test/test_fillonlyz.sh
This commit is contained in:
Dennis Heimbigner 2020-09-12 14:49:59 -06:00
parent b5dd57c3a9
commit 2f0a6d22e9
13 changed files with 337 additions and 104 deletions

View File

@ -1894,6 +1894,38 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
log_dim_info(var, fdims, fmaxdims, start, count);
#endif
/* Check the type_info fields. */
assert(var->type_info && var->type_info->size &&
var->type_info->format_type_info);
/* Later on, we will need to know the size of this type in the
* file. */
file_type_size = var->type_info->size;
/* Are we going to convert any data? (No converting of compound or
* opaque types.) */
if (mem_nc_type != var->type_info->hdr.id &&
mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE)
{
/* We must convert - allocate a buffer. */
need_to_convert++;
if (var->ndims)
for (d2 = 0; d2 < var->ndims; d2++)
len *= countp[d2];
LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name,
var->type_info->hdr.id, len));
/* If we're reading, we need bufr to have enough memory to store
* the data in the file. If we're writing, we need bufr to be
* big enough to hold all the data in the file's type. */
if (len > 0)
if (!(bufr = malloc(len * file_type_size)))
BAIL(NC_ENOMEM);
}
else
if (!bufr)
bufr = data;
/* Check dimension bounds. Remember that unlimited dimensions can
* get data beyond the length of the dataset, but within the
* lengths of the unlimited dimension(s). */
@ -1957,14 +1989,6 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
}
}
/* Check the type_info fields. */
assert(var->type_info && var->type_info->size &&
var->type_info->format_type_info);
/* Later on, we will need to know the size of this type in the
* file. */
file_type_size = var->type_info->size;
if (!no_read)
{
/* Now you would think that no one would be crazy enough to write
@ -2007,30 +2031,6 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
bufr = *(char **)data;
}
/* Are we going to convert any data? (No converting of compound or
* opaque types.) */
if (mem_nc_type != var->type_info->hdr.id &&
mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE)
{
/* We must convert - allocate a buffer. */
need_to_convert++;
if (var->ndims)
for (d2 = 0; d2 < var->ndims; d2++)
len *= countp[d2];
LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name,
var->type_info->hdr.id, len));
/* If we're reading, we need bufr to have enough memory to store
* the data in the file. If we're writing, we need bufr to be
* big enough to hold all the data in the file's type. */
if (len > 0)
if (!(bufr = malloc(len * file_type_size)))
BAIL(NC_ENOMEM);
}
else
if (!bufr)
bufr = data;
/* Create the data transfer property list. */
if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0)
BAIL(NC_EHDFERR);
@ -2047,23 +2047,6 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
((NC_HDF5_TYPE_INFO_T *)var->type_info->format_type_info)->native_hdf_typeid,
mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0)
BAIL(NC_EHDFERR);
/* Convert data type if needed. */
if (need_to_convert)
{
if ((retval = nc4_convert_type(bufr, data, var->type_info->hdr.id, mem_nc_type,
len, &range_error, var->fill_value,
(h5->cmode & NC_CLASSIC_MODEL))))
BAIL(retval);
/* For strict netcdf-3 rules, ignore erange errors between UBYTE
* and BYTE types. */
if ((h5->cmode & NC_CLASSIC_MODEL) &&
(var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) &&
(mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) &&
range_error)
range_error = 0;
}
} /* endif ! no_read */
else
{
@ -2123,7 +2106,7 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
fill_len *= (fill_value_size[d2] ? fill_value_size[d2] : 1);
/* Copy the fill value into the rest of the data buffer. */
filldata = (char *)data + real_data_size;
filldata = (char *)bufr + real_data_size;
for (i = 0; i < fill_len; i++)
{
@ -2149,9 +2132,26 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
else
memcpy(filldata, fillvalue, file_type_size);
filldata = (char *)filldata + file_type_size;
}
}
}
/* Convert data type if needed. */
if (need_to_convert)
{
if ((retval = nc4_convert_type(bufr, data, var->type_info->hdr.id, mem_nc_type,
len, &range_error, var->fill_value,
(h5->cmode & NC_CLASSIC_MODEL))))
BAIL(retval);
/* For strict netcdf-3 rules, ignore erange errors between UBYTE
* and BYTE types. */
if ((h5->cmode & NC_CLASSIC_MODEL) &&
(var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) &&
(mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) &&
range_error)
range_error = 0;
}
exit:
if (file_spaceid > 0)
if (H5Sclose(file_spaceid) < 0)

View File

@ -1678,6 +1678,38 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
log_dim_info(var, fdims, fmaxdims, start, count);
#endif
/* Check the type_info fields. */
assert(var->type_info && var->type_info->size &&
var->type_info->format_type_info);
/* Later on, we will need to know the size of this type in the
* file. */
file_type_size = var->type_info->size;
/* Are we going to convert any data? (No converting of compound or
* opaque types.) */
if (mem_nc_type != var->type_info->hdr.id &&
mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE)
{
/* We must convert - allocate a buffer. */
need_to_convert++;
if (var->ndims)
for (d2 = 0; d2 < var->ndims; d2++)
len *= countp[d2];
LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name,
var->type_info->hdr.id, len));
/* If we're reading, we need bufr to have enough memory to store
* the data in the file. If we're writing, we need bufr to be
* big enough to hold all the data in the file's type. */
if (len > 0)
if (!(bufr = malloc(len * file_type_size)))
BAIL(NC_ENOMEM);
}
else
if (!bufr)
bufr = data;
/* Check dimension bounds. Remember that unlimited dimensions can
* put data beyond their current length. */
for (d2 = 0; d2 < var->ndims; d2++)
@ -1739,14 +1771,6 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
}
}
/* Check the type_info fields. */
assert(var->type_info && var->type_info->size &&
var->type_info->format_type_info);
/* Later on, we will need to know the size of this type in the
* file. */
file_type_size = var->type_info->size;
if (!no_read)
{
#ifdef LOOK
@ -1792,29 +1816,6 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
bufr = *(char **)data;
}
#endif
/* Are we going to convert any data? (No converting of compound or
* opaque types.) */
if (mem_nc_type != var->type_info->hdr.id &&
mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE)
{
/* We must convert - allocate a buffer. */
need_to_convert++;
if (var->ndims)
for (d2 = 0; d2 < var->ndims; d2++)
len *= countp[d2];
LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name,
var->type_info->hdr.id, len));
/* If we're reading, we need bufr to have enough memory to store
* the data in the file. If we're writing, we need bufr to be
* big enough to hold all the data in the file's type. */
if (len > 0)
if (!(bufr = malloc(len * file_type_size)))
BAIL(NC_ENOMEM);
}
else
if (!bufr)
bufr = data;
#ifdef LOOK
/* Create the data transfer property list. */
@ -1831,24 +1832,8 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
if((retval = NCZ_transferslice(var, READING, start, count, stride, bufr, var->type_info->hdr.id)))
BAIL(retval);
/* Convert data type if needed. */
if (need_to_convert)
{
if ((retval = nc4_convert_type(bufr, data, var->type_info->hdr.id, mem_nc_type,
len, &range_error, var->fill_value,
(h5->cmode & NC_CLASSIC_MODEL))))
BAIL(retval);
/* For strict netcdf-3 rules, ignore erange errors between UBYTE
* and BYTE types. */
if ((h5->cmode & NC_CLASSIC_MODEL) &&
(var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) &&
(mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) &&
range_error)
range_error = 0;
}
} /* endif ! no_read */
/* Now we need to fake up any further data that was asked for,
using the fill values instead. First skip past the data we
just read, if any. */
@ -1902,6 +1887,22 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp,
}
}
/* Convert data type if needed. */
if (need_to_convert)
{
if ((retval = nc4_convert_type(bufr, data, var->type_info->hdr.id, mem_nc_type,
len, &range_error, var->fill_value,
(h5->cmode & NC_CLASSIC_MODEL))))
BAIL(retval);
/* For strict netcdf-3 rules, ignore erange errors between UBYTE
* and BYTE types. */
if ((h5->cmode & NC_CLASSIC_MODEL) &&
(var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) &&
(mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) &&
range_error)
range_error = 0;
}
exit:
#ifdef LOOK
if (file_spaceid > 0)

View File

@ -29,6 +29,8 @@ IF(BUILD_UTILITIES)
build_bin_test(renamegroup)
add_sh_test(nc_test4 run_grp_rename)
ADD_SH_TEST(nc_test4 tst_misc)
build_bin_test(tst_fillonly)
ADD_SH_TEST(nc_test4 test_fillonly)
IF(ENABLE_FILTER_TESTING)
build_bin_test(tst_filterparser)
build_bin_test(test_filter)

View File

@ -63,6 +63,9 @@ if BUILD_UTILITIES
check_PROGRAMS += renamegroup
TESTS += run_grp_rename.sh tst_misc.sh
check_PROGRAMS += tst_fillonly
TESTS += test_fillonly.sh
# Szip Tests (requires ncdump)
if USE_SZIP
check_PROGRAMS += test_szip h5testszip
@ -100,7 +103,8 @@ ref_szip.cdl tst_filter.sh bzip2.cdl ref_filtered.cdl \
ref_unfiltered.cdl ref_bzip2.c findplugin.in ref_unfilteredvv.cdl \
ref_filteredvv.cdl ref_multi.cdl \
ref_ncgenF.cdl ref_nccopyF.cdl \
ref_filter_order.txt ref_filter_repeat.txt
ref_filter_order.txt ref_filter_repeat.txt \
ref_fillonly.cdl test_fillonly.sh
CLEANFILES = tst_mpi_parallel.bin cdm_sea_soundings.nc bm_chunking.nc \
tst_floats_1D.cdl floats_1D_3.nc floats_1D.cdl tst_*.nc \
@ -110,7 +114,7 @@ tst_*.h5 tst_grp_rename.cdl tst_grp_rename.dmp ref_grp_rename.cdl \
foo1.nc tst_*.h4 test.nc testszip.nc test.h5 szip_dump.cdl \
perftest.txt bigmeta.nc bigvars.nc *.gz MSGCPP_*.nc \
floats*.nc floats*.cdl shorts*.nc shorts*.cdl ints*.nc ints*.cdl \
testfilter_reg.nc filterorder.txt filterrepeat.txt
testfilter_reg.nc filterorder.txt filterrepeat.txt tmp_fillonly.nc
DISTCLEANFILES = findplugin.sh run_par_test.sh

18
nc_test4/ref_fillonly.cdl Normal file
View File

@ -0,0 +1,18 @@
netcdf ref_fillonly {
dimensions:
x = UNLIMITED ; // (6 currently)
variables:
int i(x) ;
i:_Storage = "chunked" ;
i:_ChunkSizes = 6 ;
float f(x) ;
f:_FillValue = -9999.f ;
f:_Storage = "chunked" ;
f:_ChunkSizes = 6 ;
// global attributes:
:_Format = "netCDF-4" ;
data:
i = 1, 2, 3, 4, 5, 6 ;
}

17
nc_test4/test_fillonly.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/sh
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
# This shell script tests bug reported in github issue
# https://github.com/Unidata/netcdf-c/issues/1826
set -e
echo ""
echo "*** Testing data conversions when a variable has fill value but never written"
${NCGEN} -4 -b -o tmp_fillonly.nc $srcdir/ref_fillonly.cdl
${execdir}/tst_fillonly${ext}
exit 0

74
nc_test4/tst_fillonly.c Executable file
View File

@ -0,0 +1,74 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "netcdf.h"
#undef DEBUG
static void
nccheck(int ret)
{
if(ret == NC_NOERR) return;
fprintf(stderr,"err=%s\n",nc_strerror(ret));
exit(1);
}
#define NCCHECK(err) nccheck(err)
int
main(int argc, char *argv[] )
{
int err, ncid, varid, dimid[1];
size_t dimlen[1];
float *fdat;
int *idat;
const char* filename = "tmp_fillonly.nc";
const char* varname = "f";
const char* dimname = "x";
size_t i;
NCCHECK(err = nc_open(filename,NC_NETCDF4,&ncid));
NCCHECK(err = nc_inq_varid(ncid, varname, &varid));
NCCHECK(err = nc_inq_dimid(ncid, dimname, dimid));
NCCHECK(err = nc_inq_dim(ncid, dimid[0], NULL, dimlen));
/* Make room for both double and floating dat */
fdat = (float *)calloc(1,sizeof(float) * dimlen[0]);
idat = (int *)calloc(1,sizeof(int) * dimlen[0]);
NCCHECK(err = nc_get_var_int(ncid, varid, idat));
NCCHECK(err = nc_get_var_float(ncid, varid, fdat));
#ifdef DEBUG
printf("int[0..%d]:",(int)dimlen[0]);
for(i=0; i<dimlen[0]; i++ ) printf(" %i", idat[i]);
printf("\n");
printf("float[0..%d]:",(int)dimlen[0]);
for(i=0; i<dimlen[0]; i++ ) printf(" %f", fdat[i]);
printf("\n");
#endif
/* Do the comparisons */
for(i=0; i<dimlen[0]; i++ ) {
if(fdat[i] != (float)(idat[i])) {
fprintf(stderr,"data mismatch [%d]: float=%f (float)int=%f int=%i\n",(int)i,fdat[i],(float)idat[i],idat[i]);
return 1;
}
}
return 0;
}

View File

@ -35,6 +35,7 @@ IF(ENABLE_TESTS)
BUILD_BIN_TEST(tst_chunks ${COMMONSRC})
BUILD_BIN_TEST(tst_chunks2 ${COMMONSRC})
BUILD_BIN_TEST(tst_chunks3 ${COMMONSRC})
BUILD_BIN_TEST(tst_fillonlyz)
TARGET_INCLUDE_DIRECTORIES(ut_map PUBLIC ../libnczarr)
TARGET_INCLUDE_DIRECTORIES(ut_mapapi PUBLIC ../libnczarr)
@ -62,6 +63,7 @@ IF(ENABLE_TESTS)
IF(USE_HDF5)
add_sh_test(nczarr_test run_it_test1)
add_sh_test(nczarr_test tst_nccopyz)
add_sh_test(nczarr_test test_fillonlyz)
ENDIF()
add_sh_test(nczarr_test run_it_test2)
ENDIF(BUILD_UTILITIES)

View File

@ -34,7 +34,7 @@ ut_json_SOURCES = ut_json.c ${commonsrc}
ut_projections_SOURCES = ut_projections.c ${commonsrc}
ut_chunking_SOURCES = ut_chunking.c ${commonsrc}
check_PROGRAMS += tst_chunks tst_chunks2 tst_chunks3
check_PROGRAMS += tst_chunks tst_chunks2 tst_chunks3 tst_fillonlyz
TESTS += run_ut_map.sh
TESTS += run_ut_mapapi.sh
@ -45,6 +45,7 @@ if BUILD_UTILITIES
if USE_HDF5
TESTS += tst_nccopyz.sh
TESTS += run_it_test1.sh
TESTS += test_fillonlyz.sh
endif
TESTS += run_it_test2.sh
endif
@ -98,7 +99,8 @@ ref_ut_mapapi_meta_s3.cdl \
ref_ut_mapapi_search_nz4.txt \
ref_ut_mapapi_search_nzf.txt \
ref_ut_mapapi_search_s3.txt \
ref_perdimspecs.cdl
ref_perdimspecs.cdl \
ref_fillonly.cdl test_fillonlyz.sh
CLEANFILES = test.nzf test.nz4 test.nz4 tst_chunks.nc.nzf tst_chunks.nc.nz4 \
testmap.nzf testmap.nz4 testmapapi.nzf testmapapi.nz4 \

View File

@ -0,0 +1,20 @@
netcdf ref_fillonly {
dimensions:
// When UNLIMITED is implemented, then use this value for x
// x = UNLIMITED ; // (6 currently)
x = 6;
variables:
int i(x) ;
i:_Storage = "chunked" ;
i:_ChunkSizes = 6 ;
float f(x) ;
f:_FillValue = -9999.f ;
f:_Storage = "chunked" ;
f:_ChunkSizes = 6 ;
// global attributes:
:_Format = "netCDF-4" ;
data:
i = 1, 2, 3, 4, 5, 6 ;
}

19
nczarr_test/test_fillonlyz.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/sh
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
# This shell script tests bug reported in github issue
# https://github.com/Unidata/netcdf-c/issues/1826
set -e
echo ""
echo "*** Testing data conversions when a variable has fill value but never written"
${NCGEN} -4 -b -o 'file://tmp_fillonly.nc#mode=nczarr,nzf' $srcdir/ref_fillonly.cdl
${execdir}/tst_fillonlyz${ext}
rm -fr ./tmp_fillonly.nc
exit 0

View File

@ -0,0 +1,74 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "netcdf.h"
#undef DEBUG
static void
nccheck(int ret)
{
if(ret == NC_NOERR) return;
fprintf(stderr,"err=%s\n",nc_strerror(ret));
exit(1);
}
#define NCCHECK(err) nccheck(err)
int
main(int argc, char *argv[] )
{
int err, ncid, varid, dimid[1];
size_t dimlen[1];
float *fdat;
int *idat;
const char* filename = "file://tmp_fillonly.nc#mode=nczarr,nzf";
const char* varname = "f";
const char* dimname = "x";
size_t i;
NCCHECK(err = nc_open(filename,NC_NETCDF4,&ncid));
NCCHECK(err = nc_inq_varid(ncid, varname, &varid));
NCCHECK(err = nc_inq_dimid(ncid, dimname, dimid));
NCCHECK(err = nc_inq_dim(ncid, dimid[0], NULL, dimlen));
/* Make room for both double and floating dat */
fdat = (float *)calloc(1,sizeof(float) * dimlen[0]);
idat = (int *)calloc(1,sizeof(int) * dimlen[0]);
NCCHECK(err = nc_get_var_int(ncid, varid, idat));
NCCHECK(err = nc_get_var_float(ncid, varid, fdat));
#ifdef DEBUG
printf("int[0..%d]:",(int)dimlen[0]);
for(i=0; i<dimlen[0]; i++ ) printf(" %i", idat[i]);
printf("\n");
printf("float[0..%d]:",(int)dimlen[0]);
for(i=0; i<dimlen[0]; i++ ) printf(" %f", fdat[i]);
printf("\n");
#endif
/* Do the comparisons */
for(i=0; i<dimlen[0]; i++ ) {
if(fdat[i] != (float)(idat[i])) {
fprintf(stderr,"data mismatch [%d]: float=%f (float)int=%f int=%i\n",(int)i,fdat[i],(float)idat[i],idat[i]);
return 1;
}
}
return 0;
}

View File

@ -60,7 +60,7 @@ CHUNKSIZES=`cat tmppds.cdl | sed -e '/tas:_ChunkSizes/p' -ed | tr -d '\t \r'`
test "x$CHUNKSIZES" = 'xtas:_ChunkSizes=10,15,20;'
# Cleanup
rm -f tmp*.nc tmp*.cdl
rm -fr tmp*.nc tmp*.cdl
rm -fr tmp*.nzf
echo "*** All nccopy nczarr tests passed!"