netcdf-c/libnczarr/zcvt.c
Dennis Heimbigner d953899559 Move to Version 2 NCZarr Extended Meta-Data
re: https://github.com/zarr-developers/zarr-specs/issues/41

After discussions with the Zarr community, it was decided to
convert to a new representation of the NCZarr meta-data extensions: version 2.
These extensions store information necessary to mapping the Zarr data model
to the netcdf-4 data model.

The basic change is to remove the NCZarr specific objects: .nczarr, .nczgroup, .nczarray, and .nczattr.
The contents of these objects is moved into the corresponding existing Zarr objects as special keys. The mapping is as follows:

* ''.nczarr'' => ''/.zgroup/_NCZARR_SUPERBLOCK_''
* ''.nczgroup => ''.zgroup/_NCZARR_GROUP_''
* ''.nczarray => ''.zarray/_NCZARR_ARRAY_''
* ''.nczattr => ''.zattr/_NCZARR_ATTR_''

Backward compatibility is maintained by looking for the object ''/.nczarr''
and if found, then assuming that the dataset is in the older version 1 format.
This compatibility only supports reading of such version 1 datasets.

Documentation and test cases are also added.

Misc. Other Changes:
1. The json parsing code was added to the general library instead of nczarr only (ncjson.c, ncjson.h).
2. Improved support for different platform paths by allowing conversion
   to a single common path representation.
3. Add some new error codes.
4. Modify nccopy usage to mention the new chunking specification.
2021-07-17 16:55:30 -06:00

395 lines
10 KiB
C

/*********************************************************************
* Copyright 2018, UCAR/Unidata
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*********************************************************************/
#include "zincludes.h"
#include <math.h>
#ifdef _MSC_VER
#include <crtdbg.h>
#endif
#include "isnan.h"
/*
Code taken directly from libdap4/d4cvt.c
*/
/* Intermediate results */
struct ZCVT {
signed long long int64v;
unsigned long long uint64v;
double float64v;
};
int
NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory)
{
int stat = NC_NOERR;
nc_type srctype;
struct ZCVT zcvt;
int outofrange = 0;
/* Convert the incoming jsrc string to a restricted set of values */
switch (jsrc->sort) {
case NCJ_INT: /* convert to (u)int64 */
if(NCJstring(jsrc)[0] == '-') {
if(sscanf(NCJstring(jsrc),"%lld",&zcvt.int64v) != 1)
{stat = NC_EINVAL; goto done;}
srctype = NC_INT64;
} else {
if(sscanf(NCJstring(jsrc),"%llu",&zcvt.uint64v) != 1)
{stat = NC_EINVAL; goto done;}
srctype = NC_UINT64;
}
break;
case NCJ_STRING:
case NCJ_DOUBLE:
/* Capture nan and infinity values */
if(strcasecmp(NCJstring(jsrc),"nan")==0)
zcvt.float64v = NAN;
else if(strcasecmp(NCJstring(jsrc),"-nan")==0)
zcvt.float64v = - NAN;
else if(strcasecmp(NCJstring(jsrc),"infinity")==0)
zcvt.float64v = INFINITY;
else if(strcasecmp(NCJstring(jsrc),"-infinity")==0)
zcvt.float64v = (- INFINITY);
else {
if(sscanf(NCJstring(jsrc),"%lg",&zcvt.float64v) != 1)
{stat = NC_EINVAL; goto done;}
}
srctype = NC_DOUBLE;
break;
case NCJ_BOOLEAN:
srctype = NC_UINT64;
if(strcasecmp(NCJstring(jsrc),"false")==0)
zcvt.uint64v = 0;
else
zcvt.uint64v = 1;
break;
default: stat = NC_EINTERNAL; goto done;
}
/* Now, do the down conversion into memory */
switch (dsttype) {
case NC_BYTE: {
signed char* p = (signed char*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */
/* fall thru */
case NC_INT64:
if(zcvt.int64v < NC_MIN_BYTE || zcvt.int64v > NC_MAX_BYTE) outofrange = 1;
*p = (signed char)zcvt.int64v;
break;
case NC_UINT64:
if(zcvt.uint64v > NC_MAX_BYTE) outofrange = 1;
*p = (signed char)zcvt.uint64v;
break;
}
} break;
case NC_UBYTE: {
unsigned char* p = (unsigned char*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */
/* fall thru */
case NC_INT64:
if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_BYTE) outofrange = 1;
*p = (unsigned char)zcvt.int64v;
break;
case NC_UINT64:
if(zcvt.uint64v > NC_MAX_UBYTE) outofrange = 1;
*p = (unsigned char)zcvt.uint64v;
break;
}
} break;
case NC_SHORT: {
signed short* p = (signed short*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */
/* fall thru */
case NC_INT64:
if(zcvt.int64v < NC_MIN_SHORT || zcvt.int64v > NC_MAX_SHORT) outofrange = 1;
*p = (signed short)zcvt.int64v;
break;
case NC_UINT64:
if(zcvt.uint64v > NC_MAX_SHORT) outofrange = 1;
*p = (signed short)zcvt.uint64v;
break;
}
} break;
case NC_USHORT: {
unsigned short* p = (unsigned short*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */
/* fall thru */
case NC_INT64:
if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_USHORT) outofrange = 1;
*p = (unsigned short)zcvt.int64v;
break;
case NC_UINT64:
if(zcvt.uint64v > NC_MAX_USHORT) outofrange = 1;
*p = (unsigned short)zcvt.uint64v;
break;
}
} break;
case NC_INT: {
signed int* p = (signed int*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */
/* fall thru */
case NC_INT64:
if(zcvt.int64v < NC_MIN_INT || zcvt.int64v > NC_MAX_INT) outofrange = 1;
*p = (signed int)zcvt.int64v;
break;
case NC_UINT64:
if(zcvt.uint64v > NC_MAX_INT) outofrange = 1;
*p = (signed int)zcvt.uint64v;
break;
}
} break;
case NC_UINT: {
unsigned int* p = (unsigned int*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */
/* fall thru */
case NC_INT64:
if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_UINT) outofrange = 1;
*p = (unsigned int)zcvt.int64v;
break;
case NC_UINT64:
if(zcvt.uint64v > NC_MAX_UINT) outofrange = 1;
*p = (unsigned int)zcvt.uint64v;
break;
}
} break;
case NC_INT64: {
signed long long* p = (signed long long*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */
/* fall thru */
case NC_INT64:
*p = (signed long long)zcvt.int64v;
break;
case NC_UINT64:
if(zcvt.uint64v > NC_MAX_INT64) outofrange = 1;
*p = (signed long long)zcvt.uint64v;
break;
}
} break;
case NC_UINT64: {
unsigned long long* p = (unsigned long long*)memory;
switch (srctype) {
case NC_DOUBLE:
zcvt.int64v = (signed long long)zcvt.float64v;
/* fall thru */
case NC_INT64:
if(zcvt.int64v < 0) outofrange = 1;
*p = (unsigned long long)zcvt.int64v;
break;
case NC_UINT64:
*p = (unsigned long long)zcvt.uint64v;
break;
}
} break;
case NC_FLOAT: {
float* p = (float*)memory;
switch (srctype) {
case NC_DOUBLE:
*p = (float)zcvt.float64v;
break;
case NC_INT64:
*p = (float)zcvt.int64v;
break;
case NC_UINT64:
*p = (float)zcvt.uint64v;
break;
}
} break;
case NC_DOUBLE: {
double* p = (double*)memory;
switch (srctype) {
case NC_DOUBLE:
*p = (double)zcvt.float64v;
break;
case NC_INT64:
*p = (double)zcvt.int64v;
break;
case NC_UINT64:
*p = (double)zcvt.uint64v;
break;
}
} break;
default: stat = NC_EINTERNAL; goto done;
}
done:
if(stat == NC_NOERR && outofrange) stat = NC_ERANGE;
return stat;
}
int
NCZ_stringconvert1(nc_type srctype, unsigned char* src, char** strp)
{
int stat = NC_NOERR;
struct ZCVT zcvt;
nc_type dsttype = NC_NAT;
char s[1024];
assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype < NC_STRING);
/* Convert to a restricted set of values */
switch (srctype) {
case NC_BYTE: {
zcvt.int64v = (signed long long)(*((signed char*)src));
dsttype = NC_INT64;
} break;
case NC_UBYTE: {
zcvt.uint64v = (unsigned long long)(*((unsigned char*)src));
dsttype = NC_UINT64;
} break;
case NC_SHORT: {
zcvt.int64v = (signed long long)(*((signed short*)src));
dsttype = NC_INT64;
} break;
case NC_USHORT: {
zcvt.uint64v = (unsigned long long)(*((unsigned short*)src));
dsttype = NC_UINT64;
} break;
case NC_INT: {
zcvt.int64v = (signed long long)(*((signed int*)src));
dsttype = NC_INT64;
} break;
case NC_UINT: {
zcvt.uint64v = (unsigned long long)(*((unsigned int*)src));
dsttype = NC_UINT64;
} break;
case NC_INT64: {
zcvt.int64v = (signed long long)(*((signed long long*)src));
dsttype = NC_INT64;
} break;
case NC_UINT64: {
zcvt.uint64v = (unsigned long long)(*((unsigned long long*)src));
dsttype = NC_UINT64;
} break;
case NC_FLOAT: {
zcvt.float64v = (double)(*((float*)src));
dsttype = NC_DOUBLE;
} break;
case NC_DOUBLE: {
zcvt.float64v= (double)(*((double*)src));
dsttype = NC_DOUBLE;
} break;
default: stat = NC_EINTERNAL; goto done;
}
/* Convert from restricted set of values to standardized string form*/
switch (dsttype) {
case NC_INT64: {
snprintf(s,sizeof(s),"%lld",zcvt.int64v);
} break;
case NC_UINT64: {
snprintf(s,sizeof(s),"%llu",zcvt.uint64v);
} break;
case NC_DOUBLE: {
snprintf(s,sizeof(s),"%lg",zcvt.float64v); /* handles NAN? */
} break;
default: stat = NC_EINTERNAL; goto done;
}
if(strp) *strp = strdup(s);
done:
return stat;
}
int
NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap)
{
int stat = NC_NOERR;
int i;
char* src = data0; /* so we can do arithmetic on it */
size_t typelen;
char* str = NULL;
NCjson* jvalue = NULL;
NCjson* jdata = NULL;
if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
goto done;
/* Handle char type specially */
if(typeid == NC_CHAR) {
/* Create a string valued json object */
if((stat = NCJnewstringn(NCJ_STRING,len,src,&jdata)))
goto done;
} else { /* all other cases */
if(len == 0) {stat = NC_EINVAL; goto done;}
if(len > 1) {
if((stat = NCJnew(NCJ_ARRAY,&jdata))) goto done;
} else /* return a singletone */
jdata = NULL;
for(i=0;i<len;i++) {
char* special = NULL;
double d;
if((stat = NCZ_stringconvert1(typeid, src, &str)))
goto done;
switch (typeid) {
case NC_BYTE: case NC_SHORT: case NC_INT: case NC_INT64:
case NC_UBYTE: case NC_USHORT: case NC_UINT: case NC_UINT64:
if((stat=NCJnew(NCJ_INT,&jvalue))) goto done;
break;
case NC_FLOAT:
case NC_DOUBLE: {
if(typeid == NC_FLOAT)
d = (double)(*((float*)src));
else
d = *((double*)src);
#ifdef _WIN32
switch (_fpclass(d)) {
case _FPCLASS_SNAN: case _FPCLASS_QNAN:
special = "Nan"; break;
case _FPCLASS_NINF:
special = "-Infinity"; break;
case _FPCLASS_PINF:
special = "Infinity"; break;
default: break;
}
#else
if(isnan(d))
special = "NaN";
else if(isinf(d) && d < 0)
special = "-Infinity";
else if(isinf(d) && d > 0)
special = "Infinity";
else {}
#endif
if((stat=NCJnew(NCJ_DOUBLE,&jvalue))) goto done;
} break;
case NC_CHAR:
if((stat=NCJnew(NCJ_STRING,&jvalue))) goto done;
break;
default: stat = NC_EINTERNAL; goto done;
}
if(special) {nullfree(str); str = strdup(special);}
NCJstring(jvalue) = str;
str = NULL;
if(len == 1)
jdata = jvalue;
else
NCJappend(jdata,jvalue);
jvalue = NULL;
src += typelen;
}
}
if(jdatap) {*jdatap = jdata; jdata = NULL;}
done:
nullfree(str);
NCJreclaim(jvalue);
NCJreclaim(jdata);
return stat;
}