netcdf-c/ncdump/tst_utf8.c

129 lines
4.0 KiB
C
Raw Normal View History

2010-06-03 21:24:43 +08:00
/* This is part of the netCDF package.
Copyright 2018 University Corporation for Atmospheric Research/Unidata.
2010-06-03 21:24:43 +08:00
See COPYRIGHT file for conditions of use.
This is a very simple example which writes a netCDF file with
Unicode names encoded with UTF-8. It is the NETCDF3 equivalent
of tst_unicode.c
$Id: tst_utf8.c,v 1.10 2008/10/20 01:48:07 ed Exp $
*/
Primary change: add dap4 support Specific changes: 1. Add dap4 code: libdap4 and dap4_test. Note that until the d4ts server problem is solved, dap4 is turned off. 2. Modify various files to support dap4 flags: configure.ac, Makefile.am, CMakeLists.txt, etc. 3. Add nc_test/test_common.sh. This centralizes the handling of the locations of various things in the build tree: e.g. where is ncgen.exe located. See nc_test/test_common.sh for details. 4. Modify .sh files to use test_common.sh 5. Obsolete separate oc2 by moving it to be part of netcdf-c. This means replacing code with netcdf-c equivalents. 5. Add --with-testserver to configure.ac to allow override of the servers to be used for --enable-dap-remote-tests. 6. There were multiple versions of nctypealignment code. Try to centralize in libdispatch/doffset.c and include/ncoffsets.h 7. Add a unit test for the ncuri code because of its complexity. 8. Move the findserver code out of libdispatch and into a separate, self contained program in ncdap_test and dap4_test. 9. Move the dispatch header files (nc{3,4}dispatch.h) to .../include because they are now shared by modules. 10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts. 11. Make use of MREMAP if available 12. Misc. minor changes e.g. - #include <config.h> -> #include "config.h" - Add some no-install headers to /include - extern -> EXTERNL and vice versa as needed - misc header cleanup - clean up checking for misc. unix vs microsoft functions 13. Change copyright decls in some files to point to LICENSE file. 14. Add notes to RELEASENOTES.md
2017-03-09 08:01:10 +08:00
#include "config.h"
2010-06-03 21:24:43 +08:00
#include <stdlib.h>
#include <nc_tests.h>
#include <err_macros.h>
2010-06-03 21:24:43 +08:00
#include <netcdf.h>
#include <string.h>
/* The data file we will create. */
#define FILE7_NAME "tst_utf8.nc"
#define UNITS "units"
#define NDIMS 1
#define NX 18
int
main(int argc, char **argv)
{
int ncid, dimid, varid;
int dimids[NDIMS];
/* (unnormalized) UTF-8 encoding for Unicode 8-character "Hello" in Greek */
unsigned char name_utf8[] = {
0xCE, 0x9A, /* GREEK CAPITAL LETTER KAPPA : 2-bytes utf8 */
0xCE, 0xB1, /* GREEK SMALL LETTER LAMBDA : 2-bytes utf8 */
0xCE, 0xBB, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
0xCE, 0xB7, /* GREEK SMALL LETTER ETA : 2-bytes utf8 */
0xCE, 0xBC, /* GREEK SMALL LETTER MU : 2-bytes utf8 */
0xE1, 0xBD, 0xB3, /* GREEK SMALL LETTER EPSILON
WITH TONOS : 3-bytes utf8 */
0xCF, 0x81, /* GREEK SMALL LETTER RHO : 2-bytes utf8 */
0xCE, 0xB1, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
0x00
};
/* Name used for dimension, variable, and attribute value */
#define UNAME ((char *) name_utf8)
#define UNAMELEN (sizeof name_utf8)
char name_in[UNAMELEN + 1], strings_in[UNAMELEN + 1];
nc_type att_type;
size_t att_len;
2010-06-03 21:24:43 +08:00
printf("\n*** Testing UTF-8.\n");
printf("*** creating UTF-8 test file %s...", FILE7_NAME);
if (nc_create(FILE7_NAME, NC_CLOBBER, &ncid))
2010-06-03 21:24:43 +08:00
ERR;
/* Define dimension with Unicode UTF-8 encoded name */
if (nc_def_dim(ncid, UNAME, NX, &dimid))
2010-06-03 21:24:43 +08:00
ERR;
dimids[0] = dimid;
/* Define variable with same name */
if (nc_def_var(ncid, UNAME, NC_CHAR, NDIMS, dimids, &varid))
2010-06-03 21:24:43 +08:00
ERR;
/* Create string attribute with same value */
if (nc_put_att_text(ncid, varid, UNITS, UNAMELEN, UNAME))
2010-06-03 21:24:43 +08:00
ERR;
if (nc_enddef(ncid))
2010-06-03 21:24:43 +08:00
ERR;
/* Write string data, UTF-8 encoded, to the file */
if (nc_put_var_text(ncid, varid, UNAME))
2010-06-03 21:24:43 +08:00
ERR;
if (nc_close(ncid))
2010-06-03 21:24:43 +08:00
ERR;
/* Check it out. */
2010-06-03 21:24:43 +08:00
/* Reopen the file. */
if (nc_open(FILE7_NAME, NC_NOWRITE, &ncid))
2010-06-03 21:24:43 +08:00
ERR;
if (nc_inq_varid(ncid, UNAME, &varid))
2010-06-03 21:24:43 +08:00
ERR;
if (nc_inq_varname(ncid, varid, name_in))
2010-06-03 21:24:43 +08:00
ERR;
{
/* Note, name was normalized before storing, so retrieved name
won't match original unnormalized name. Check that we get
normalized version, instead. */
2010-06-03 21:24:43 +08:00
/* NFC normalized UTF-8 for Unicode 8-character "Hello" in Greek */
unsigned char norm_utf8[] = {
0xCE, 0x9A, /* GREEK CAPITAL LETTER KAPPA : 2-bytes utf8 */
0xCE, 0xB1, /* GREEK SMALL LETTER LAMBDA : 2-bytes utf8 */
0xCE, 0xBB, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
0xCE, 0xB7, /* GREEK SMALL LETTER ETA : 2-bytes utf8 */
0xCE, 0xBC, /* GREEK SMALL LETTER MU : 2-bytes utf8 */
0xCE, 0xAD, /* GREEK SMALL LETTER EPSILON WITH TONOS
2010-06-03 21:24:43 +08:00
: 2-bytes utf8 */
0xCF, 0x81, /* GREEK SMALL LETTER RHO : 2-bytes utf8 */
0xCE, 0xB1, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
0x00
};
#define NNAME ((char *) norm_utf8)
#define NNAMELEN (sizeof norm_utf8)
if (strncmp(NNAME, name_in, NNAMELEN))
2010-06-03 21:24:43 +08:00
ERR;
}
if (nc_inq_att(ncid, varid, UNITS, &att_type, &att_len))
2010-06-03 21:24:43 +08:00
ERR;
if (att_type != NC_CHAR || att_len != UNAMELEN)
2010-06-03 21:24:43 +08:00
ERR;
/* We don't normalize data or attribute values, so get exactly what was put */
if (nc_get_att_text(ncid, varid, UNITS, strings_in))
2010-06-03 21:24:43 +08:00
ERR;
strings_in[att_len] = '\0';
if (strncmp(UNAME, strings_in, UNAMELEN))
2010-06-03 21:24:43 +08:00
ERR;
if (nc_close(ncid))
2010-06-03 21:24:43 +08:00
ERR;
SUMMARIZE_ERR;
FINAL_RESULTS;
}