Merge pull request #478 from wkliao/cdf5_var_len

CDF-5 fix: let NC_var.len be the true size of variable
This commit is contained in:
Ward Fisher 2018-02-04 13:25:41 -07:00 committed by GitHub
commit b4a29470c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 243 additions and 72 deletions

View File

@ -438,22 +438,6 @@ fi
AM_CONDITIONAL(USE_STRICT_NULL_BYTE_HEADER_PADDING, [test x$enable_strict_null_byte_header_padding = xyes ])
# Check whether we want to enable CDF5 support.
AC_MSG_CHECKING([whether CDF5 support should be enabled (default off)])
AC_ARG_ENABLE([cdf5],
[AS_HELP_STRING([--enable-cdf5],
[build with CDF5 support.])])
test "x$enable_cdf5" = xyes || enable_cdf5=no
AC_MSG_RESULT($enable_cdf5)
if test "x$enable_cdf5" = xyes; then
AC_DEFINE([USE_CDF5], [1], [if true, enable CDF5 Support])
AC_DEFINE([ENABLE_CDF5], [1], [if true, enable CDF5 Support])
fi
AM_CONDITIONAL(USE_CDF5, [test x$enable_cdf5 = xyes ])
AM_CONDITIONAL(ENABLE_CDF5, [test x$enable_cdf5 = xyes ])
# Does the user want to use the ffio module?
AC_MSG_CHECKING([whether FFIO will be used])
AC_ARG_ENABLE([ffio],
@ -882,8 +866,32 @@ $SLEEPCMD
AC_CHECK_SIZEOF(size_t)
$SLEEPCMD
AC_CHECK_SIZEOF(unsigned long long)
$SLEEPCMD
AC_CHECK_SIZEOF(unsigned long long)
# Check whether we want to enable CDF5 support.
AC_MSG_CHECKING([whether CDF5 support should be enabled])
AC_ARG_ENABLE([cdf5],
[AS_HELP_STRING([--disable-cdf5],
[build without CDF5 support.])],
[enable_cdf5=${enableval}], [enable_cdf5=auto]
)
if test "x${enable_cdf5}" = xyes && test "$ac_cv_sizeof_size_t" -lt "8" ; then
dnl unable to support CDF5, but --enable-cdf5 is explicitly set
AC_MSG_ERROR([Unable to support CDF5 feature because size_t is less than 4 bytes])
fi
if test "$ac_cv_sizeof_size_t" -lt "8" ; then
enable_cdf5=no
else
enable_cdf5=yes
fi
AC_MSG_RESULT($enable_cdf5)
if test "x${enable_cdf5}" = xyes; then
AC_DEFINE([USE_CDF5], [1], [if true, enable CDF5 Support])
AC_DEFINE([ENABLE_CDF5], [1], [if true, enable CDF5 Support])
fi
AM_CONDITIONAL(USE_CDF5, [test x$enable_cdf5 = xyes ])
AM_CONDITIONAL(ENABLE_CDF5, [test x$enable_cdf5 = xyes ])
$SLEEPCMD
if test "$ac_cv_type_uchar" = yes ; then
@ -1371,7 +1379,7 @@ AC_SUBST(HAS_DISKLESS,[$enable_diskless])
AC_SUBST(HAS_MMAP,[$enable_mmap])
AC_SUBST(HAS_JNA,[$enable_jna])
AC_SUBST(RELAX_COORD_BOUND,[$enable_relax_coord_bound])
AC_SUBST(ENABLE_ERANGE_FILL,[$enable_erange_fill])
AC_SUBST(HAS_ERANGE_FILL,[$enable_erange_fill])
# Include some specifics for netcdf on windows.
#AH_VERBATIM([_WIN32_STRICMP],

View File

@ -428,6 +428,9 @@ nc_put_rec(int ncid, size_t recnum, void *const *datap);
extern int
NC_check_vlens(NC3_INFO *ncp);
extern int
NC_check_voffs(NC3_INFO *ncp);
/* Define accessors for the dispatchdata */
#define NC3_DATA(nc) ((NC3_INFO*)(nc)->dispatchdata)
#define NC3_DATA_SET(nc,data) ((nc)->dispatchdata = (void*)(data))

View File

@ -312,21 +312,23 @@ fprintf(stderr, " REC %d %s: %ld\n", ii, (*vpp)->name->cp, (long)index);
return NC_EVARSIZE;
}
#endif
if((*vpp)->len != UINT32_MAX) /* flag for vars >= 2**32 bytes */
ncp->recsize += (*vpp)->len;
ncp->recsize += (*vpp)->len;
last = (*vpp);
}
/*
* for special case of
*/
if(last != NULL) {
if(ncp->recsize == last->len) { /* exactly one record variable, pack value */
ncp->recsize = *last->dsizes * last->xsz;
} else if(last->len == UINT32_MAX) { /* huge last record variable */
ncp->recsize += *last->dsizes * last->xsz;
}
}
/*
* for special case (Check CDF-1 and CDF-2 file format specifications.)
* "A special case: Where there is exactly one record variable, we drop the
* requirement that each record be four-byte aligned, so in this case there
* is no record padding."
*/
if (last != NULL) {
if (ncp->recsize == last->len) {
/* exactly one record variable, pack value */
ncp->recsize = *last->dsizes * last->xsz;
}
}
if(NC_IsNew(ncp))
NC_set_numrecs(ncp, 0);
return NC_NOERR;
@ -709,17 +711,14 @@ NC_check_vlens(NC3_INFO *ncp)
if(ncp->vars.nelems == 0)
return NC_NOERR;
if (fIsSet(ncp->flags,NC_64BIT_DATA)) {
/* CDF5 format allows many large vars */
return NC_NOERR;
}
if (fIsSet(ncp->flags,NC_64BIT_OFFSET) && sizeof(off_t) > 4) {
if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* CDF-5 */
vlen_max = X_INT64_MAX - 3; /* "- 3" handles rounded-up size */
else if (fIsSet(ncp->flags,NC_64BIT_OFFSET) && sizeof(off_t) > 4)
/* CDF2 format and LFS */
vlen_max = X_UINT_MAX - 3; /* "- 3" handles rounded-up size */
} else {
/* CDF1 format */
else /* CDF1 format */
vlen_max = X_INT_MAX - 3;
}
/* Loop through vars, first pass is for non-record variables. */
large_vars_count = 0;
rec_vars_count = 0;
@ -728,6 +727,8 @@ NC_check_vlens(NC3_INFO *ncp)
if( !IS_RECVAR(*vpp) ) {
last = 0;
if( NC_check_vlen(*vpp, vlen_max) == 0 ) {
if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* too big for CDF-5 */
return NC_EVARSIZE;
large_vars_count++;
last = 1;
}
@ -756,6 +757,8 @@ NC_check_vlens(NC3_INFO *ncp)
if( IS_RECVAR(*vpp) ) {
last = 0;
if( NC_check_vlen(*vpp, vlen_max) == 0 ) {
if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* too big for CDF-5 */
return NC_EVARSIZE;
large_vars_count++;
last = 1;
}
@ -774,6 +777,59 @@ NC_check_vlens(NC3_INFO *ncp)
return NC_NOERR;
}
/*----< NC_check_voffs() >---------------------------------------------------*/
/*
* Given a valid ncp, check whether the file starting offsets (begin) of all
* variables follows the same increasing order as they were defined.
*/
int
NC_check_voffs(NC3_INFO *ncp)
{
size_t i;
off_t prev_off;
NC_var *varp;
if (ncp->vars.nelems == 0) return NC_NOERR;
/* Loop through vars, first pass is for non-record variables */
prev_off = ncp->begin_var;
for (i=0; i<ncp->vars.nelems; i++) {
varp = ncp->vars.value[i];
if (IS_RECVAR(varp)) continue;
if (varp->begin < prev_off) {
#if 0
fprintf(stderr,"Variable \"%s\" begin offset (%lld) is less than previous variable end offset (%lld)\n", varp->name->cp, varp->begin, prev_off);
#endif
return NC_ENOTNC;
}
prev_off = varp->begin + varp->len;
}
if (ncp->begin_rec < prev_off) {
#if 0
fprintf(stderr,"Record variable section begin offset (%lld) is less than fix-sized variable section end offset (%lld)\n", varp->begin, prev_off);
#endif
return NC_ENOTNC;
}
/* Loop through vars, second pass is for record variables */
prev_off = ncp->begin_rec;
for (i=0; i<ncp->vars.nelems; i++) {
varp = ncp->vars.value[i];
if (!IS_RECVAR(varp)) continue;
if (varp->begin < prev_off) {
#if 0
fprintf(stderr,"Variable \"%s\" begin offset (%lld) is less than previous variable end offset (%lld)\n", varp->name->cp, varp->begin, prev_off);
#endif
return NC_ENOTNC;
}
prev_off = varp->begin + varp->len;
}
return NC_NOERR;
}
/*
* End define mode.
@ -794,6 +850,9 @@ NC_endef(NC3_INFO *ncp,
if(status != NC_NOERR)
return status;
status = NC_begins(ncp, h_minfree, v_align, v_minfree, r_align);
if(status != NC_NOERR)
return status;
status = NC_check_voffs(ncp);
if(status != NC_NOERR)
return status;

View File

@ -939,6 +939,7 @@ static int
v1h_put_NC_var(v1hs *psp, const NC_var *varp)
{
int status;
size_t vsize;
status = v1h_put_NC_string(psp, varp->name);
if(status != NC_NOERR)
@ -975,9 +976,19 @@ v1h_put_NC_var(v1hs *psp, const NC_var *varp)
if(status != NC_NOERR)
return status;
status = v1h_put_size_t(psp, &varp->len);
if(status != NC_NOERR)
return status;
/* write vsize to header.
* CDF format specification: The vsize field is actually redundant, because
* its value may be computed from other information in the header. The
* 32-bit vsize field is not large enough to contain the size of variables
* that require more than 2^32 - 4 bytes, so 2^32 - 1 is used in the vsize
* field for such variables.
*/
vsize = varp->len;
if (varp->len > 4294967292UL && (psp->version == NC_FORMAT_CLASSIC ||
psp->version == NC_FORMAT_64BIT_OFFSET))
vsize = 4294967295UL; /* 2^32-1 */
status = v1h_put_size_t(psp, &vsize);
if(status != NC_NOERR) return status;
status = check_v1hs(psp, psp->version == 1 ? 4 : 8); /*begin*/
if(status != NC_NOERR)
@ -1231,11 +1242,6 @@ NC_computeshapes(NC3_INFO* ncp)
{
if(first_rec == NULL)
first_rec = *vpp;
if((*vpp)->len == UINT32_MAX &&
(fIsSet(ncp->flags, NC_64BIT_OFFSET) ||
fIsSet(ncp->flags, NC_64BIT_DATA))) /* Flag for large last record */
ncp->recsize += (*vpp)->dsizes[0] * (*vpp)->xsz;
else
ncp->recsize += (*vpp)->len;
}
else
@ -1538,6 +1544,10 @@ nc_get_NC(NC3_INFO* ncp)
if(status != NC_NOERR)
goto unwind_get;
status = NC_check_voffs(ncp);
if(status != NC_NOERR)
goto unwind_get;
unwind_get:
(void) rel_v1hs(&gs);
return status;

View File

@ -485,29 +485,21 @@ NC_var_shape(NC_var *varp, const NC_dimarray *dims)
out :
if( varp->xsz <= (X_UINT_MAX - 1) / product ) /* if integer multiply will not overflow */
{
varp->len = product * varp->xsz;
switch(varp->type) {
case NC_BYTE :
case NC_CHAR :
case NC_UBYTE :
case NC_SHORT :
case NC_USHORT :
if( varp->len%4 != 0 )
{
varp->len += 4 - varp->len%4; /* round up */
/* *dsp += 4 - *dsp%4; */
}
break;
default:
/* already aligned */
break;
}
} else
{ /* OK for last var to be "too big", indicated by this special len */
varp->len = X_UINT_MAX;
}
/* No variable size can be > X_INT64_MAX - 3 */
if (0 == NC_check_vlen(varp, X_INT64_MAX-3)) return NC_EVARSIZE;
/*
* For CDF-1 and CDF-2 formats, the total number of array elements
* cannot exceed 2^32, unless this variable is the last fixed-size
* variable, there is no record variable, and the file starting
* offset of this variable is less than 2GiB.
* This will be checked in NC_check_vlens() during NC_endef()
*/
varp->len = product * varp->xsz;
if (varp->len % 4 > 0)
varp->len += 4 - varp->len % 4; /* round up */
#if 0
arrayp("\tshape", varp->ndims, varp->shape);
arrayp("\tdsizes", varp->ndims, varp->dsizes);

View File

@ -12,7 +12,6 @@ AM_CPPFLAGS += -DTOPBINDIR=${abs_top_bindir}
LDADD = ${top_builddir}/liblib/libnetcdf.la
AM_CPPFLAGS += -I$(top_builddir)/liblib -I$(top_builddir)/include -I$(top_srcdir)/libsrc
# Note which tests depend on other tests. necessary for make -j check
TEST_EXTENSIONS = .sh
@ -46,7 +45,7 @@ test_write.c util.c error.h tests.h
# If the user asked for large file tests, then add them.
if LARGE_FILE_TESTS
TESTPROGRAMS += quick_large_files tst_big_var6 tst_big_var2 \
tst_big_rvar tst_big_var tst_large large_files tst_large_cdf5
tst_big_rvar tst_big_var tst_large large_files
endif # LARGE_FILE_TESTS
if BUILD_BENCHMARKS
@ -111,6 +110,19 @@ tst_formatx.nc nc_test_cdf5.nc unlim.nc tst_inq_type.nc \
tst_elatefill.nc tst_global_fillval.nc tst_large_cdf5.nc \
tst_max_var_dims.nc benchmark.nc tst_def_var_fill.nc
EXTRA_DIST += bad_cdf5_begin.nc run_cdf5.sh
if ENABLE_CDF5
# bad_cdf5_begin.nc is a corrupted CDF-5 file with bad variable starting
# file offsets. It is to be used by tst_open_cdf5.c to check if it can
# detect and report error code NC_ENOTNC.
TESTS += run_cdf5.sh
check_PROGRAMS += tst_open_cdf5
if LARGE_FILE_TESTS
TESTPROGRAMS += tst_large_cdf5 tst_cdf5_begin
CLEANFILES += tst_large_cdf5.nc tst_cdf5_begin.nc
endif
endif
# Only clean these on maintainer-clean, because they require m4 to
# regenerate.
#MAINTAINERCLEANFILES = test_get.c test_put.c

BIN
nc_test/bad_cdf5_begin.nc Normal file

Binary file not shown.

6
nc_test/run_cdf5.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/sh
set -e
./tst_open_cdf5 ${srcdir}/bad_cdf5_begin.nc

56
nc_test/tst_cdf5_begin.c Normal file
View File

@ -0,0 +1,56 @@
#include <stdio.h>
#include <stdlib.h>
#include <netcdf.h>
/* When using NetCDF 4.4.1 ad prior to create a CDF-5 file and defining a small
* variable after a big variable (> 2^32-3 bytes), the file starting offset of
* the small variable (and all variables defined after the big variable) is
* calculated incorrectly. This test program detects this bug by checking the
* contents of the possible overlaps between the two variables.
*/
#define ERR {if(err!=NC_NOERR){printf("Error at line %d in %s: %s\n", __LINE__,__FILE__, nc_strerror(err));nerrs++;}}
#define FILE_NAME "tst_cdf5_begin.nc"
int main(int argc, char *argv[])
{
int i, err, nerrs=0, ncid, dimid[2], varid[2];
short buf[10];
size_t start, count;
err = nc_create(FILE_NAME, NC_CLOBBER|NC_64BIT_DATA, &ncid); ERR;
err = nc_def_dim(ncid, "dim0", NC_MAX_UINT, &dimid[0]); ERR
err = nc_def_dim(ncid, "dim1", 10, &dimid[1]); ERR
/* define one small variable after one big variable */
err = nc_def_var(ncid, "var_big", NC_SHORT, 1, &dimid[0], &varid[0]); ERR
err = nc_def_var(ncid, "var_small", NC_SHORT, 1, &dimid[1], &varid[1]); ERR
err = nc_set_fill(ncid, NC_NOFILL, NULL); ERR
err = nc_enddef(ncid); ERR
/* write to var_big in location overlapping with var_small when using
* netCDF 4.4.x or prior */
start = NC_MAX_UINT/sizeof(short);
count = 10;
for (i=0; i<10; i++) buf[i] = i;
err = nc_put_vara_short(ncid, varid[0], &start, &count, buf); ERR
/* write var_small */
for (i=0; i<10; i++) buf[i] = -1;
err = nc_put_var_short(ncid, varid[1], buf); ERR
/* read back var_big and check contents */
for (i=0; i<10; i++) buf[i] = -1;
err = nc_get_vara_short(ncid, varid[0], &start, &count,buf); ERR
for (i=0; i<10; i++) {
if (buf[i] != i) {
printf("Error at buf[%d] expect %d but got %hd\n",i,i,buf[i]);
nerrs++;
}
}
err = nc_close(ncid); ERR
return (nerrs > 0);
}

25
nc_test/tst_open_cdf5.c Normal file
View File

@ -0,0 +1,25 @@
#include <stdio.h>
#include <stdlib.h>
#include <netcdf.h>
#define FILE_NAME "bad_cdf5_begin.nc"
int main(int argc, char *argv[])
{
char *fname=FILE_NAME;
int err, nerrs=0, ncid;
if (argc == 2) fname = argv[1];
err = nc_open(fname, NC_NOWRITE, &ncid);
if (err != NC_ENOTNC) {
printf("Error: nc_open() expect NC_ENOTNC but got (%s)\n",
nc_strerror(err));
nerrs++;
}
else if (err == NC_NOERR) /* close file */
nc_close(ncid);
return (nerrs > 0);
}