mirror of
https://github.com/Unidata/netcdf-c.git
synced 2025-03-25 17:40:27 +08:00
[NCF-293]
Allow .cdl files to have a leading utf-8 BOM. Also add test.
This commit is contained in:
parent
deeca5fb83
commit
baade3e4fc
@ -624,7 +624,7 @@ AC_HEADER_STDBOOL
|
||||
# Check for these functions...
|
||||
AC_CHECK_FUNCS([strlcat strerror snprintf strchr strrchr strcat strcpy \
|
||||
strdup strcasecmp strtod strtoll strtoull strstr \
|
||||
mkstemp rand \
|
||||
mkstemp rand memcmp \
|
||||
getrlimit gettimeofday fsync MPI_Comm_f2c])
|
||||
|
||||
# Does the user want to use NC_DISKLESS?
|
||||
|
@ -37,7 +37,7 @@ TARGET_LINK_LIBRARIES(ncdump netcdf ${ALL_TLL_LIBS})
|
||||
TARGET_LINK_LIBRARIES(nccopy netcdf ${ALL_TLL_LIBS})
|
||||
|
||||
IF(ENABLE_TESTS)
|
||||
ADD_EXECUTABLE(rewrite-scalar rewrite-scalar.c)
|
||||
ADD_EXECUTABLE(rewrite-scalar rewrite-scalar.c bom.c)
|
||||
TARGET_LINK_LIBRARIES(rewrite-scalar netcdf)
|
||||
# Base tests
|
||||
# The tests are set up as a combination of shell scripts and executables that
|
||||
@ -58,8 +58,8 @@ IF(ENABLE_TESTS)
|
||||
add_sh_test(ncdump tst_charfill)
|
||||
add_sh_test(ncdump tst_iter)
|
||||
add_sh_test(ncdump tst_formatx3)
|
||||
add_sh_test(ncdump tst_bom)
|
||||
|
||||
|
||||
IF(EXTRA_TESTS)
|
||||
add_sh_test(ncdump run_back_comp_tests)
|
||||
ENDIF()
|
||||
|
@ -28,10 +28,10 @@ man_MANS = ncdump.1 nccopy.1
|
||||
if BUILD_TESTSETS
|
||||
#if !BUILD_DLL
|
||||
# These tests are run for both netCDF-4 and non-netCDF-4 builds.
|
||||
check_PROGRAMS = rewrite-scalar ctest ctest64 ncdump tst_utf8
|
||||
check_PROGRAMS = rewrite-scalar ctest ctest64 ncdump tst_utf8 bom
|
||||
TESTS = run_tests.sh tst_64bit.sh ctest ctest64 tst_output.sh \
|
||||
tst_lengths.sh tst_calendars.sh tst_utf8 run_utf8_tests.sh \
|
||||
tst_nccopy3.sh tst_charfill.sh tst_iter.sh tst_formatx3.sh
|
||||
tst_nccopy3.sh tst_charfill.sh tst_iter.sh tst_formatx3.sh tst_bom.sh
|
||||
|
||||
if LARGE_FILE_TESTS
|
||||
TESTS += tst_iter.sh
|
||||
|
33
ncdump/bom.c
Normal file
33
ncdump/bom.c
Normal file
@ -0,0 +1,33 @@
|
||||
/*********************************************************************
|
||||
* Copyright 1993, UCAR/Unidata
|
||||
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
|
||||
*********************************************************************/
|
||||
|
||||
#include <config.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* BOM Sequences */
|
||||
static char* U8 = "\xEF\xBB\xBF"; /* UTF-8 */
|
||||
static char* BE32 = "\x00\x00\xFE\xFF"; /* UTF-32; big-endian */
|
||||
static char* LE32 = "\xFF\xFE"; /* UTF-32; little-endian */
|
||||
static char* BE16 = "\xFE\xFF"; /* UTF-16; big-endian */
|
||||
static char* LE16 = "\xFF\xFE"; /* UTF-16; little-endian */
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
char* bom = U8;
|
||||
int bomlen = 3;
|
||||
if(argc > 1 && strlen(argv[1]) > 0) {
|
||||
char* which = argv[1];
|
||||
switch (which[0]) {
|
||||
case '1': bom = BE16; bomlen = 2; break;
|
||||
case '3': bom = BE32; bomlen = 2; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
fwrite(bom,1,bomlen,stdout);
|
||||
exit(0);
|
||||
}
|
||||
|
54
ncdump/tst_bom.sh
Normal file
54
ncdump/tst_bom.sh
Normal file
@ -0,0 +1,54 @@
|
||||
#!/bin/sh
|
||||
# This shell script tests BOM support in ncgen
|
||||
|
||||
set -e
|
||||
|
||||
if test "x$srcdir" = "x"; then
|
||||
srcdir=`dirname $0`;
|
||||
fi
|
||||
# add hack for sunos
|
||||
export srcdir;
|
||||
|
||||
echo ""
|
||||
|
||||
rm -f tst_bom.cdl tmp.cdl tst_bom8.* tst_bom16.*
|
||||
|
||||
cat <<EOF >>tst_bom.cdl
|
||||
netcdf tst_bom {
|
||||
variables:
|
||||
float f;
|
||||
data:
|
||||
|
||||
f = 1;
|
||||
}
|
||||
EOF
|
||||
|
||||
echo "*** Generate a cdl file with leading UTF-8 BOM."
|
||||
./bom 8 >tst_bom8.cdl
|
||||
cat tst_bom.cdl >> tst_bom8.cdl
|
||||
|
||||
echo "*** Verify .nc file"
|
||||
../ncgen/ncgen -k1 -o tst_bom8.nc tst_bom8.cdl
|
||||
../ncdump/ncdump -n tst_bom tst_bom8.nc > tmp.cdl
|
||||
diff -w tst_bom.cdl tmp.cdl
|
||||
|
||||
# Do it again but with Big-Endian 16; should fail
|
||||
|
||||
rm -f tmp.cdl tst_bom8.* tst_bom16.*
|
||||
|
||||
echo "*** Generate a cdl file with leading UTF-16 BOM."
|
||||
./bom 16 >tst_bom16.cdl
|
||||
cat tst_bom.cdl >> tst_bom16.cdl
|
||||
|
||||
echo "*** Verify UTF-16 file fails"
|
||||
if ../ncgen/ncgen -k1 -o tst_bom16.nc tst_bom16.cdl ; then
|
||||
echo 'BOM Big Endian 16 succeeded, but should not'
|
||||
exit 1
|
||||
else
|
||||
echo '***XFAIL: BOM Big Endian 16'
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
rm -f tst_bom.cdl tmp.cdl tst_bom8.* tst_bom16.*
|
||||
|
||||
exit 0
|
32
ncgen/main.c
32
ncgen/main.c
@ -124,6 +124,13 @@ struct Languages legallanguages[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
/* BOM Sequences */
|
||||
static char* U8 = "\xEF\xBB\xBF"; /* UTF-8 */
|
||||
static char* BE32 = "\x00\x00\xFE\xFF"; /* UTF-32; big-endian */
|
||||
static char* LE32 = "\xFF\xFE"; /* UTF-32; little-endian */
|
||||
static char* BE16 = "\xFE\xFF"; /* UTF-16; big-endian */
|
||||
static char* LE16 = "\xFF\xFE"; /* UTF-16; little-endian */
|
||||
|
||||
/* The default minimum iterator size depends
|
||||
on whether we are doing binary or language
|
||||
based output.
|
||||
@ -371,11 +378,36 @@ main(
|
||||
|
||||
fp = stdin;
|
||||
if (argc > 0 && strcmp(argv[0], "-") != 0) {
|
||||
char bom[4];
|
||||
size_t count;
|
||||
if ((fp = fopen(argv[0], "r")) == NULL) {
|
||||
derror ("can't open file %s for reading: ", argv[0]);
|
||||
perror("");
|
||||
return(7);
|
||||
}
|
||||
/* Check the leading bytes for an occurrence of a BOM */
|
||||
/* re: http://www.unicode.org/faq/utf_bom.html#BOM */
|
||||
/* Attempt to read the first four bytes */
|
||||
memset(bom,0,sizeof(bom));
|
||||
count = fread(bom,1,2,fp);
|
||||
if(count == 2) {
|
||||
switch (bom[0]) {
|
||||
case '\x00':
|
||||
case '\xFF':
|
||||
case '\xFE':
|
||||
/* Only UTF-* is allowed; complain and exit */
|
||||
fprintf(stderr,"Input file contains a BOM indicating a non-UTF8 encoding\n");
|
||||
return 1;
|
||||
case '\xEF':
|
||||
/* skip the BOM */
|
||||
fread(bom,1,1,fp);
|
||||
break;
|
||||
default: /* legal printable char, presumably; rewind */
|
||||
rewind(fp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cdlname = (char*)emalloc(NC_MAX_NAME);
|
||||
cdlname = nulldup(argv[0]);
|
||||
if(strlen(cdlname) > NC_MAX_NAME) cdlname[NC_MAX_NAME] = '\0';
|
||||
|
Loading…
x
Reference in New Issue
Block a user