Allow use of type keywords as identifier in formats that do not support that type.

Built-in type-name keywords are currently flagged when used as
identifiers in formats that do not support that type.  So if a
user declares a dimension named "string" in a classic .cdl file,
it causes an error.

This PR modifies ncgen to allow those format-specific type keywords
to be used as identifiers when compiling to formats that do not
support that type. Also added a test for this.

Also a couple of misc. changes to conditionalize some debug output.
This commit is contained in:
Dennis Heimbigner 2020-06-05 17:03:29 -06:00
parent b36c9c5b3f
commit 90b912b7e8
11 changed files with 830 additions and 429 deletions

View File

@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release
## 4.8.0 - TBD
* [Enhancement] Modify ncgen to allow type keywords not used in some particular format to be used as identifiers when compiling to that format. For example, when compiling a netcdf-3 .cdl file, allow the use of "string" as a dimension name [https://github.com/Unidata/netcdf-c/issues/1750].
* [Bug Fix] Fix some protocol bugs/differences between the netcdf-c library and the OPeNDAP Hyrax server. Also cleanup checksum handling [https://github.com/Unidata/netcdf-c/issues/1712].
* [Bug Fix] Add necessary __declspec declarations to allow compilation
of netcdf library without causing errors or (_declspec related)

View File

@ -268,6 +268,10 @@ ENDIF(MSVC)
add_sh_test(ncdump tst_ctests)
IF(USE_CDF5)
add_sh_test(ncdump test_keywords)
ENDIF()
ENDIF()
ENDIF()

View File

@ -4,7 +4,6 @@
# Ed Hartnett, Dennis Heimbigner, Ward Fisher
#SH_LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver-verbose
#sh_LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver-verbose
#LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver-verbose
@ -112,6 +111,11 @@ if USE_HDF5
TESTS += tst_ctests.sh
endif
if ENABLE_CDF5
# Test for keywords as identifiers
TESTS += test_keywords.sh
endif
endif BUILD_TESTSETS
# These files all have to be included with the distribution.
@ -150,8 +154,8 @@ tst_ncgen4_cycle.sh tst_null_byte_padding.sh \
ref_null_byte_padding_test.nc ref_tst_irish_rover.nc ref_provenance_v1.nc \
ref_tst_radix.cdl tst_radix.cdl test_radix.sh \
ref_nccopy_w.cdl tst_nccopy_w3.sh tst_nccopy_w4.sh ref_no_ncproperty.nc \
test_unicode_directory.sh
test_unicode_directory.sh \
test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl
# The L512.bin file is file containing exactly 512 bytes each of value 0.
# It is used for creating hdf5 files with varying offsets for testing.
@ -186,4 +190,5 @@ tst_compound_datasize_test.cdl tst_compound_datasize_test2.cdl \
tst_ncf199.cdl tst_tst_gattenum.cdl tst_tst_usuffix.cdl ctest.c \
ctest64.c nccopy3_subset_out.nc camrun.c tst_ncf213.cdl tst_ncf213.nc \
tst_radix.nc tmp_radix.cdl ctest_small_3.c ctest_small_4.c \
ctest_special_atts_4.c
ctest_special_atts_4.c \
keyword1.nc keyword2.nc tmp_keyword1.cdl tmp_keyword2.cdl

View File

@ -32,6 +32,7 @@
#include "list.h"
#undef DEBUGFILTER
#undef DEBUGCHUNK
/* default bytes of memory we are willing to allocate for variable
* values during copy */
@ -1462,6 +1463,7 @@ copy_vars(int igrp, int ogrp)
return stat;
}
#ifdef DEBUGCHUNK
static void
report(int rank, size_t* start, size_t* count, void* buf)
{
@ -1482,7 +1484,7 @@ report(int rank, size_t* start, size_t* count, void* buf)
fprintf(stderr,"\n");
fflush(stderr);
}
#endif /*DEBUGCHUNK*/
/* Copy the schema in a group and all its subgroups, recursively, from
* group igrp in input to parent group ogrp in destination. Use
@ -1636,7 +1638,9 @@ copy_var_data(int igrp, int varid, int ogrp) {
* subsequent calls. */
while((ntoget = nc_next_iter(iterp, start, count)) > 0) {
NC_CHECK(nc_get_vara(igrp, varid, start, count, buf));
report(iterp->rank,start,count,buf);
#ifdef DEBUGCHUNK
report(iterp->rank,start,count,buf);
#endif
NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf));
#ifdef USE_NETCDF4
/* we have to explicitly free values for strings and vlens */

8
ncdump/ref_keyword1.cdl Normal file
View File

@ -0,0 +1,8 @@
netcdf keyword1 {
dimensions:
string = 128;
int64 = 64;
variables:
int string(string);
int int64(int64);
}

6
ncdump/ref_keyword2.cdl Normal file
View File

@ -0,0 +1,6 @@
netcdf keyword2 {
dimensions:
string = 128;
variables:
int string(string);
}

23
ncdump/test_keywords.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/sh
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
set -e
echo "*** Test use of keywords for formats where the keyword is not defined"
echo "*** classic: creating keyword1.nc from ref_keyword1.cdl..."
${NCGEN} -3 -lb -o keyword1.nc $srcdir/ref_keyword1.cdl
echo "*** creating tmp_keyword1.cdl from keyword1.nc..."
${NCDUMP} -h keyword1.nc > tmp_keyword1.cdl
echo "*** comparing tmp_keyword1.cdl to ref_keyword1.cdl..."
diff -b -w tmp_keyword1.cdl $srcdir/ref_keyword1.cdl
echo "*** cdf5: creating keyword2.nc from ref_keyword2.cdl..."
${NCGEN} -5 -lb -o keyword2.nc $srcdir/ref_keyword2.cdl
echo "*** creating tmp_keyword2.cdl from keyword2.nc..."
${NCDUMP} -h keyword2.nc > tmp_keyword2.cdl
echo "*** comparing tmp_keyword2.cdl to ref_keyword2.cdl..."
diff -b -w tmp_keyword2.cdl $srcdir/ref_keyword2.cdl
exit 0

View File

@ -139,6 +139,35 @@ struct Specialtoken specials[] = {
{NULL,0} /* null terminate */
};
/* Track keywords that may be identifiers depending on
format being produced */
/* Define the possible format classes */
#define KWALL (1<<NC_FORMAT_CLASSIC|1<<NC_FORMAT_64BIT_OFFSET|1<<NC_FORMAT_NETCDF4|1<<NC_FORMAT_NETCDF4_CLASSIC|1<<NC_FORMAT_64BIT_DATA) /* Used in all formats */
#define KWCDF5 (1<<NC_FORMAT_64BIT_DATA) /* Used in cdf5 */
#define KWNC4 (1<<NC_FORMAT_NETCDF4) /* Used in netcdf-4 */
#define NKWIDENT 12
struct KWIDENT {
int token;
const char* keyword;
int formats; /* Which formats use this keyword */
} kwident[NKWIDENT] = {
/* Order by token for binary search */
{CHAR_K, "char", KWALL},
{BYTE_K, "byte", KWALL},
{SHORT_K, "short", KWALL},
{INT_K, "int", KWALL},
{FLOAT_K, "float", KWALL},
{DOUBLE_K, "double", KWALL},
{UBYTE_K, "ubyte", KWCDF5|KWNC4},
{USHORT_K, "ushort", KWCDF5|KWNC4},
{UINT_K, "uint", KWCDF5|KWNC4},
{INT64_K, "int64", KWCDF5|KWNC4},
{UINT64_K, "uint64", KWCDF5|KWNC4},
{STRING_K, "string", KWNC4}
};
static int identorkw(int token);
%}
%x ST_C_COMMENT
%x TEXT
@ -260,26 +289,27 @@ yytext[MAXTRST-1] = '\0';
return lexdebug(OPAQUESTRING);
}
compound|struct|structure {return lexdebug(COMPOUND);}
enum {return lexdebug(ENUM);}
compound|struct|structure {return lexdebug(identorkw(COMPOUND));}
enum {return lexdebug(identorkw(ENUM));}
opaque {return lexdebug(OPAQUE_);}
float|real {return lexdebug(FLOAT_K);}
char {return lexdebug(CHAR_K);}
byte {return lexdebug(BYTE_K);}
ubyte {return lexdebug(UBYTE_K);}
short {return lexdebug(SHORT_K);}
ushort {return lexdebug(USHORT_K);}
long|int|integer {return lexdebug(INT_K);}
ulong|uint|uinteger {return lexdebug(UINT_K);}
int64 {return lexdebug(INT64_K);}
uint64 {return lexdebug(UINT64_K);}
double {return lexdebug(DOUBLE_K);}
string {return lexdebug(STRING_K);}
float|real {return lexdebug(identorkw(FLOAT_K));}
char {return lexdebug(identorkw(CHAR_K));}
byte {return lexdebug(identorkw(BYTE_K));}
ubyte {return lexdebug(identorkw(UBYTE_K));}
short {return lexdebug(identorkw(SHORT_K));}
ushort {return lexdebug(identorkw(USHORT_K));}
long|int|integer {return lexdebug(identorkw(INT_K));}
ulong|uint|uinteger {return lexdebug(identorkw(UINT_K));}
int64 {return lexdebug(identorkw(INT64_K));}
uint64 {return lexdebug(identorkw(UINT64_K));}
double {return lexdebug(identorkw(DOUBLE_K));}
string {return lexdebug(identorkw(STRING_K));}
unlimited|UNLIMITED {int32_val = -1;
return lexdebug(NC_UNLIMITED_K);}
return lexdebug(identorkw(NC_UNLIMITED_K));}
/* These are currently only keywords */
types: {return lexdebug(TYPES);}
dimensions: {return lexdebug(DIMENSIONS);}
variables: {return lexdebug(VARIABLES);}
@ -875,3 +905,39 @@ collecttag(char* text, char** stagp)
}
return tag;
}
/* Depending on the format, a name may be a keword or an ident */
static int
identorkw(int token)
{
/* Binary search for yytext */
int n = NKWIDENT;
int L = 0;
int R = (n - 1);
int m, cmp;
struct KWIDENT* p;
int found = 0;
size_t len;
char* id = NULL;
for(;;) {
if(L > R) break;
m = (L + R) / 2;
p = &kwident[m];
cmp = (p->token - token);
if(cmp == 0) {found = 1; break;}
if(cmp < 0)
L = (m + 1);
else /*cmp > 0*/
R = (m - 1);
}
if(!found) return token; /* Not a keyword of interest */
/* See if the format applies */
if(p->formats & ((int)1<<k_flag)) return token;
/* Need to convert a non-ident token to an ident symbol */
len = strlen(yytext);
len = unescape(yytext,len,ISIDENT,&id);
yylval.sym = install(id);
efree(id);
return IDENT; /* treat as identifier */
}

View File

@ -217,7 +217,7 @@ NCConstant* constant;
attrdecl enumid path dimref fielddim fieldspec
%type <sym> typeref
%type <sym> varref
%type <sym> type_var_ref
%type <sym> ambiguous_ref
%type <mark> enumidlist fieldlist fields varlist dimspec dimlist field
fielddimspec fielddimlist
%type <constant> dataitem constdata constint conststring constbool
@ -670,7 +670,7 @@ fielddim:
/* Use this when referencing defined objects */
varref:
type_var_ref
ambiguous_ref
{Symbol* vsym = $1;
if(vsym->objectclass != NC_VAR) {
derror("Undefined or forward referenced variable: %s",vsym->name);
@ -681,7 +681,7 @@ varref:
;
typeref:
type_var_ref
ambiguous_ref
{Symbol* tsym = $1;
if(tsym->objectclass != NC_TYPE) {
derror("Undefined or forward referenced type: %s",tsym->name);
@ -691,7 +691,7 @@ typeref:
}
;
type_var_ref:
ambiguous_ref:
path
{Symbol* tvsym = $1; Symbol* sym;
/* disambiguate*/
@ -729,7 +729,7 @@ attrdecl:
{$$ = makespecial(_SUPERBLOCK_FLAG,NULL,NULL,(void*)$4,ISCONST);}
| ':' ident '=' datalist
{ $$=makeattribute($2,NULL,NULL,$4,ATTRGLOBAL);}
| typeref type_var_ref ':' ident '=' datalist
| typeref ambiguous_ref ':' ident '=' datalist
{Symbol* tsym = $1; Symbol* vsym = $2; Symbol* asym = $4;
if(vsym->objectclass == NC_VAR) {
$$=makeattribute(asym,vsym,tsym,$6,ATTRVAR);
@ -738,7 +738,7 @@ attrdecl:
YYABORT;
}
}
| type_var_ref ':' ident '=' datalist
| ambiguous_ref ':' ident '=' datalist
{Symbol* sym = $1; Symbol* asym = $3;
if(sym->objectclass == NC_VAR) {
$$=makeattribute(asym,sym,NULL,$5,ATTRVAR);
@ -749,25 +749,25 @@ attrdecl:
YYABORT;
}
}
| type_var_ref ':' _FILLVALUE '=' datalist
| ambiguous_ref ':' _FILLVALUE '=' datalist
{$$ = makespecial(_FILLVALUE_FLAG,$1,NULL,(void*)$5,ISLIST);}
| typeref type_var_ref ':' _FILLVALUE '=' datalist
| typeref ambiguous_ref ':' _FILLVALUE '=' datalist
{$$ = makespecial(_FILLVALUE_FLAG,$2,$1,(void*)$6,ISLIST);}
| type_var_ref ':' _STORAGE '=' conststring
| ambiguous_ref ':' _STORAGE '=' conststring
{$$ = makespecial(_STORAGE_FLAG,$1,NULL,(void*)$5,ISCONST);}
| type_var_ref ':' _CHUNKSIZES '=' intlist
| ambiguous_ref ':' _CHUNKSIZES '=' intlist
{$$ = makespecial(_CHUNKSIZES_FLAG,$1,NULL,(void*)$5,ISLIST);}
| type_var_ref ':' _FLETCHER32 '=' constbool
| ambiguous_ref ':' _FLETCHER32 '=' constbool
{$$ = makespecial(_FLETCHER32_FLAG,$1,NULL,(void*)$5,ISCONST);}
| type_var_ref ':' _DEFLATELEVEL '=' constint
| ambiguous_ref ':' _DEFLATELEVEL '=' constint
{$$ = makespecial(_DEFLATE_FLAG,$1,NULL,(void*)$5,ISCONST);}
| type_var_ref ':' _SHUFFLE '=' constbool
| ambiguous_ref ':' _SHUFFLE '=' constbool
{$$ = makespecial(_SHUFFLE_FLAG,$1,NULL,(void*)$5,ISCONST);}
| type_var_ref ':' _ENDIANNESS '=' conststring
| ambiguous_ref ':' _ENDIANNESS '=' conststring
{$$ = makespecial(_ENDIAN_FLAG,$1,NULL,(void*)$5,ISCONST);}
| type_var_ref ':' _FILTER '=' conststring
| ambiguous_ref ':' _FILTER '=' conststring
{$$ = makespecial(_FILTER_FLAG,$1,NULL,(void*)$5,ISCONST);}
| type_var_ref ':' _NOFILL '=' constbool
| ambiguous_ref ':' _NOFILL '=' constbool
{$$ = makespecial(_NOFILL_FLAG,$1,NULL,(void*)$5,ISCONST);}
| ':' _FORMAT '=' conststring
{$$ = makespecial(_FORMAT_FLAG,NULL,NULL,(void*)$4,ISCONST);}

File diff suppressed because it is too large Load Diff

View File

@ -677,7 +677,7 @@ static const char *const yytname[] =
"dim_or_attr_decl", "dimdeclist", "dimdecl", "dimd", "vasection",
"vadecls", "vadecl_or_attr", "vardecl", "varlist", "varspec", "dimspec",
"dimlist", "dimref", "fieldlist", "fieldspec", "fielddimspec",
"fielddimlist", "fielddim", "varref", "typeref", "type_var_ref",
"fielddimlist", "fielddim", "varref", "typeref", "ambiguous_ref",
"attrdecllist", "attrdecl", "path", "datasection", "datadecls",
"datadecl", "datalist", "datalist0", "datalist1", "dataitem",
"constdata", "econstref", "function", "arglist", "simpleconstant",