/* This is part of the netCDF package. Copyright 2018 University Corporation for Atmospheric Research/Unidata. See COPYRIGHT file for conditions of use. This is a very simple example which writes a netCDF file with Unicode names encoded with UTF-8. It is the NETCDF3 equivalent of tst_unicode.c $Id: tst_utf8.c,v 1.10 2008/10/20 01:48:07 ed Exp $ */ #include "config.h" #include #include #include #include #include /* The data file we will create. */ #define FILE7_NAME "tst_utf8.nc" #define UNITS "units" #define NDIMS 1 #define NX 18 int main(int argc, char **argv) { int ncid, dimid, varid; int dimids[NDIMS]; /* (unnormalized) UTF-8 encoding for Unicode 8-character "Hello" in Greek */ unsigned char name_utf8[] = { 0xCE, 0x9A, /* GREEK CAPITAL LETTER KAPPA : 2-bytes utf8 */ 0xCE, 0xB1, /* GREEK SMALL LETTER LAMBDA : 2-bytes utf8 */ 0xCE, 0xBB, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */ 0xCE, 0xB7, /* GREEK SMALL LETTER ETA : 2-bytes utf8 */ 0xCE, 0xBC, /* GREEK SMALL LETTER MU : 2-bytes utf8 */ 0xE1, 0xBD, 0xB3, /* GREEK SMALL LETTER EPSILON WITH TONOS : 3-bytes utf8 */ 0xCF, 0x81, /* GREEK SMALL LETTER RHO : 2-bytes utf8 */ 0xCE, 0xB1, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */ 0x00 }; /* Name used for dimension, variable, and attribute value */ #define UNAME ((char *) name_utf8) #define UNAMELEN (sizeof name_utf8) char name_in[UNAMELEN + 1], strings_in[UNAMELEN + 1]; nc_type att_type; size_t att_len; printf("\n*** Testing UTF-8.\n"); printf("*** creating UTF-8 test file %s...", FILE7_NAME); if (nc_create(FILE7_NAME, NC_CLOBBER, &ncid)) ERR; /* Define dimension with Unicode UTF-8 encoded name */ if (nc_def_dim(ncid, UNAME, NX, &dimid)) ERR; dimids[0] = dimid; /* Define variable with same name */ if (nc_def_var(ncid, UNAME, NC_CHAR, NDIMS, dimids, &varid)) ERR; /* Create string attribute with same value */ if (nc_put_att_text(ncid, varid, UNITS, UNAMELEN, UNAME)) ERR; if (nc_enddef(ncid)) ERR; /* Write string data, UTF-8 encoded, to the file */ if (nc_put_var_text(ncid, varid, UNAME)) ERR; if (nc_close(ncid)) ERR; /* Check it out. */ /* Reopen the file. */ if (nc_open(FILE7_NAME, NC_NOWRITE, &ncid)) ERR; if (nc_inq_varid(ncid, UNAME, &varid)) ERR; if (nc_inq_varname(ncid, varid, name_in)) ERR; { /* Note, name was normalized before storing, so retrieved name won't match original unnormalized name. Check that we get normalized version, instead. */ /* NFC normalized UTF-8 for Unicode 8-character "Hello" in Greek */ unsigned char norm_utf8[] = { 0xCE, 0x9A, /* GREEK CAPITAL LETTER KAPPA : 2-bytes utf8 */ 0xCE, 0xB1, /* GREEK SMALL LETTER LAMBDA : 2-bytes utf8 */ 0xCE, 0xBB, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */ 0xCE, 0xB7, /* GREEK SMALL LETTER ETA : 2-bytes utf8 */ 0xCE, 0xBC, /* GREEK SMALL LETTER MU : 2-bytes utf8 */ 0xCE, 0xAD, /* GREEK SMALL LETTER EPSILON WITH TONOS : 2-bytes utf8 */ 0xCF, 0x81, /* GREEK SMALL LETTER RHO : 2-bytes utf8 */ 0xCE, 0xB1, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */ 0x00 }; #define NNAME ((char *) norm_utf8) #define NNAMELEN (sizeof norm_utf8) if (strncmp(NNAME, name_in, NNAMELEN)) ERR; } if (nc_inq_att(ncid, varid, UNITS, &att_type, &att_len)) ERR; if (att_type != NC_CHAR || att_len != UNAMELEN) ERR; /* We don't normalize data or attribute values, so get exactly what was put */ if (nc_get_att_text(ncid, varid, UNITS, strings_in)) ERR; strings_in[att_len] = '\0'; if (strncmp(UNAME, strings_in, UNAMELEN)) ERR; if (nc_close(ncid)) ERR; SUMMARIZE_ERR; FINAL_RESULTS; }