mirror of
https://github.com/Unidata/netcdf-c.git
synced 2025-01-06 15:34:44 +08:00
231ae96c4b
* re: https://github.com/Unidata/netcdf-c/pull/2278 * re: https://github.com/Unidata/netcdf-c/issues/2485 * re: https://github.com/Unidata/netcdf-c/issues/2474 This PR subsumes PR https://github.com/Unidata/netcdf-c/pull/2278. Actually is a bit an omnibus covering several issues. ## PR https://github.com/Unidata/netcdf-c/pull/2278 Add support for the Zarr string type. Zarr strings are restricted currently to be of fixed size. The primary issue to be addressed is to provide a way for user to specify the size of the fixed length strings. This is handled by providing the following new attributes special: 1. **_nczarr_default_maxstrlen** — This is an attribute of the root group. It specifies the default maximum string length for string types. If not specified, then it has the value of 64 characters. 2. **_nczarr_maxstrlen** — This is a per-variable attribute. It specifies the maximum string length for the string type associated with the variable. If not specified, then it is assigned the value of **_nczarr_default_maxstrlen**. This PR also requires some hacking to handle the existing netcdf-c NC_CHAR type, which does not exist in zarr. The goal was to choose numpy types for both the netcdf-c NC_STRING type and the netcdf-c NC_CHAR type such that if a pure zarr implementation read them, it would still work and an NC_CHAR type would be handled by zarr as a string of length 1. For writing variables and NCZarr attributes, the type mapping is as follows: * "|S1" for NC_CHAR. * ">S1" for NC_STRING && MAXSTRLEN==1 * ">Sn" for NC_STRING && MAXSTRLEN==n Note that it is a bit of a hack to use endianness, but it should be ok since for string/char, the endianness has no meaning. For reading attributes with pure zarr (i.e. with no nczarr atribute types defined), they will always be interpreted as of type NC_CHAR. ## Issue: https://github.com/Unidata/netcdf-c/issues/2474 This PR partly fixes this issue because it provided more comprehensive support for Zarr attributes that are JSON valued expressions. This PR still does not address the problem in that issue where the _ARRAY_DIMENSION attribute is incorrectly set. Than can only be fixed by the creator of the datasets. ## Issue: https://github.com/Unidata/netcdf-c/issues/2485 This PR also fixes the scalar failure shown in this issue. It generally cleans up scalar handling. It also adds a note to the documentation describing that NCZarr supports scalars while Zarr does not and also how scalar interoperability is achieved. ## Misc. Other Changes 1. Convert the nczarr special attributes and keys to be all lower case. So "_NCZARR_ATTR" now used "_nczarr_attr. Support back compatibility for the upper case names. 2. Cleanup my too-clever-by-half handling of scalars in libnczarr.
1513 lines
40 KiB
C
1513 lines
40 KiB
C
/**
|
|
* @file
|
|
*
|
|
* Infer as much as possible from the omode + path.
|
|
* Rewrite the path to a canonical form.
|
|
*
|
|
* Copyright 2018 University Corporation for Atmospheric
|
|
* Research/Unidata. See COPYRIGHT file for more info.
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include <stdlib.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#ifdef HAVE_SYS_TYPES_H
|
|
#include <sys/types.h>
|
|
#endif
|
|
|
|
#include "ncdispatch.h"
|
|
#include "ncpathmgr.h"
|
|
#include "netcdf_mem.h"
|
|
#include "fbits.h"
|
|
#include "ncbytes.h"
|
|
#include "nclist.h"
|
|
#include "nclog.h"
|
|
#include "ncrc.h"
|
|
#ifdef ENABLE_BYTERANGE
|
|
#include "nchttp.h"
|
|
#ifdef ENABLE_S3_SDK
|
|
#include "ncs3sdk.h"
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef nulldup
|
|
#define nulldup(x) ((x)?strdup(x):(x))
|
|
#endif
|
|
|
|
#undef DEBUG
|
|
|
|
/* If Defined, then use only stdio for all magic number io;
|
|
otherwise use stdio or mpio as required.
|
|
*/
|
|
#undef USE_STDIO
|
|
|
|
/**
|
|
Sort info for open/read/close of
|
|
file when searching for magic numbers
|
|
*/
|
|
struct MagicFile {
|
|
const char* path;
|
|
struct NCURI* uri;
|
|
int omode;
|
|
NCmodel* model;
|
|
long long filelen;
|
|
int use_parallel;
|
|
int iss3;
|
|
void* parameters; /* !NULL if inmemory && !diskless */
|
|
FILE* fp;
|
|
#ifdef USE_PARALLEL
|
|
MPI_File fh;
|
|
#endif
|
|
#ifdef ENABLE_BYTERANGE
|
|
char* curlurl; /* url to use with CURLOPT_SET_URL */
|
|
NC_HTTP_STATE* state;
|
|
#ifdef ENABLE_S3_SDK
|
|
NCS3INFO s3;
|
|
void* s3client;
|
|
char* errmsg;
|
|
#endif
|
|
#endif
|
|
};
|
|
|
|
/** @internal Magic number for HDF5 files. To be consistent with
|
|
* H5Fis_hdf5, use the complete HDF5 magic number */
|
|
static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n";
|
|
|
|
#define modelcomplete(model) ((model)->impl != 0)
|
|
|
|
#ifdef DEBUG
|
|
static void dbgflush(void)
|
|
{
|
|
fflush(stdout);
|
|
fflush(stderr);
|
|
}
|
|
|
|
static void
|
|
fail(int err)
|
|
{
|
|
return;
|
|
}
|
|
|
|
static int
|
|
check(int err)
|
|
{
|
|
if(err != NC_NOERR)
|
|
fail(err);
|
|
return err;
|
|
}
|
|
#else
|
|
#define check(err) (err)
|
|
#endif
|
|
|
|
/*
|
|
Define a table of "mode=" string values
|
|
from which the implementation can be inferred.
|
|
Note that only cases that can currently
|
|
take URLs are included.
|
|
*/
|
|
static struct FORMATMODES {
|
|
const char* tag;
|
|
const int impl; /* NC_FORMATX_XXX value */
|
|
const int format; /* NC_FORMAT_XXX value */
|
|
} formatmodes[] = {
|
|
{"dap2",NC_FORMATX_DAP2,NC_FORMAT_CLASSIC},
|
|
{"dap4",NC_FORMATX_DAP4,NC_FORMAT_NETCDF4},
|
|
{"netcdf-3",NC_FORMATX_NC3,0}, /* Might be e.g. cdf5 */
|
|
{"classic",NC_FORMATX_NC3,0}, /* ditto */
|
|
{"netcdf-4",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
|
|
{"enhanced",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
|
|
{"udf0",NC_FORMATX_UDF0,NC_FORMAT_NETCDF4},
|
|
{"udf1",NC_FORMATX_UDF1,NC_FORMAT_NETCDF4},
|
|
{"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
|
|
{"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
|
|
{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */
|
|
{NULL,0},
|
|
};
|
|
|
|
/* Replace top-level name with defkey=defvalue */
|
|
static const struct MACRODEF {
|
|
char* name;
|
|
char* defkey;
|
|
char* defvalues[4];
|
|
} macrodefs[] = {
|
|
{"zarr","mode",{"nczarr","zarr",NULL}},
|
|
{"dap2","mode",{"dap2",NULL}},
|
|
{"dap4","mode",{"dap4",NULL}},
|
|
{"s3","mode",{"s3","nczarr",NULL}},
|
|
{"bytes","mode",{"bytes",NULL}},
|
|
{"xarray","mode",{"zarr", NULL}},
|
|
{"noxarray","mode",{"nczarr", "noxarray", NULL}},
|
|
{"zarr","mode",{"nczarr","zarr", NULL}},
|
|
{NULL,NULL,{NULL}}
|
|
};
|
|
|
|
/* Mode inferences: if mode contains key, then add the inference and infer again */
|
|
static const struct MODEINFER {
|
|
char* key;
|
|
char* inference;
|
|
} modeinferences[] = {
|
|
{"zarr","nczarr"},
|
|
{"xarray","zarr"},
|
|
{"noxarray","nczarr"},
|
|
{NULL,NULL}
|
|
};
|
|
|
|
/* Mode negations: if mode contains key, then remove all occurrences of the inference and repeat */
|
|
static const struct MODEINFER modenegations[] = {
|
|
{"bytes","nczarr"}, /* bytes negates (nc)zarr */
|
|
{"bytes","zarr"},
|
|
{"noxarray","xarray"},
|
|
{NULL,NULL}
|
|
};
|
|
|
|
/* Map FORMATX to readability to get magic number */
|
|
static struct Readable {
|
|
int impl;
|
|
int readable;
|
|
} readable[] = {
|
|
{NC_FORMATX_NC3,1},
|
|
{NC_FORMATX_NC_HDF5,1},
|
|
{NC_FORMATX_NC_HDF4,1},
|
|
{NC_FORMATX_PNETCDF,1},
|
|
{NC_FORMATX_DAP2,0},
|
|
{NC_FORMATX_DAP4,0},
|
|
{NC_FORMATX_UDF0,0},
|
|
{NC_FORMATX_UDF1,0},
|
|
{NC_FORMATX_NCZARR,0}, /* eventually make readable */
|
|
{0,0},
|
|
};
|
|
|
|
/* Define the known URL protocols and their interpretation */
|
|
static struct NCPROTOCOLLIST {
|
|
const char* protocol;
|
|
const char* substitute;
|
|
const char* fragments; /* arbitrary fragment arguments */
|
|
} ncprotolist[] = {
|
|
{"http",NULL,NULL},
|
|
{"https",NULL,NULL},
|
|
{"file",NULL,NULL},
|
|
{"dods","http","mode=dap2"},
|
|
{"dap4","http","mode=dap4"},
|
|
{"s3","s3","mode=s3"},
|
|
{NULL,NULL,NULL} /* Terminate search */
|
|
};
|
|
|
|
/* Forward */
|
|
static int NC_omodeinfer(int useparallel, int omode, NCmodel*);
|
|
static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri);
|
|
static int processuri(const char* path, NCURI** urip, NClist* fraglist);
|
|
static int processmacros(NClist** fraglistp);
|
|
static char* envvlist2string(NClist* pairs, const char*);
|
|
static void set_default_mode(int* cmodep);
|
|
static int parseonchar(const char* s, int ch, NClist* segments);
|
|
|
|
static int openmagic(struct MagicFile* file);
|
|
static int readmagic(struct MagicFile* file, long pos, char* magic);
|
|
static int closemagic(struct MagicFile* file);
|
|
static int NC_interpret_magic_number(char* magic, NCmodel* model);
|
|
#ifdef DEBUG
|
|
static void printmagic(const char* tag, char* magic,struct MagicFile*);
|
|
static void printlist(NClist* list, const char* tag);
|
|
#endif
|
|
static int isreadable(NCURI*,NCmodel*);
|
|
static char* list2string(NClist*);
|
|
static int parsepair(const char* pair, char** keyp, char** valuep);
|
|
static NClist* parsemode(const char* modeval);
|
|
static const char* getmodekey(const NClist* envv);
|
|
static int replacemode(NClist* envv, const char* newval);
|
|
static int inferone(const char* mode, NClist* newmodes);
|
|
static int negateone(const char* mode, NClist* modes);
|
|
|
|
/*
|
|
If the path looks like a URL, then parse it, reformat it.
|
|
*/
|
|
static int
|
|
processuri(const char* path, NCURI** urip, NClist* fraglenv)
|
|
{
|
|
int stat = NC_NOERR;
|
|
int found = 0;
|
|
NClist* tmp = NULL;
|
|
struct NCPROTOCOLLIST* protolist;
|
|
NCURI* uri = NULL;
|
|
size_t pathlen = strlen(path);
|
|
char* str = NULL;
|
|
const char** ufrags;
|
|
const char** p;
|
|
|
|
if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;}
|
|
|
|
/* Defaults */
|
|
if(urip) *urip = NULL;
|
|
|
|
ncuriparse(path,&uri);
|
|
if(uri == NULL) goto done; /* not url */
|
|
|
|
/* Look up the protocol */
|
|
for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) {
|
|
if(strcmp(uri->protocol,protolist->protocol) == 0) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(!found)
|
|
{stat = NC_EINVAL; goto done;} /* unrecognized URL form */
|
|
|
|
/* process the corresponding fragments for that protocol */
|
|
if(protolist->fragments != NULL) {
|
|
int i;
|
|
tmp = nclistnew();
|
|
if((stat = parseonchar(protolist->fragments,'&',tmp))) goto done;
|
|
for(i=0;i<nclistlength(tmp);i++) {
|
|
char* key=NULL;
|
|
char* value=NULL;
|
|
if((stat = parsepair(nclistget(tmp,i),&key,&value))) goto done;
|
|
if(value == NULL) value = strdup("");
|
|
nclistpush(fraglenv,key);
|
|
nclistpush(fraglenv,value);
|
|
}
|
|
nclistfreeall(tmp); tmp = NULL;
|
|
}
|
|
|
|
/* Substitute the protocol in any case */
|
|
if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute);
|
|
|
|
/* capture the fragments of the url */
|
|
ufrags = ncurifragmentparams(uri);
|
|
if(ufrags != NULL) {
|
|
for(p=ufrags;*p;p+=2) {
|
|
const char* key = p[0];
|
|
const char* value = p[1];
|
|
nclistpush(fraglenv,nulldup(key));
|
|
value = (value==NULL?"":value);
|
|
nclistpush(fraglenv,strdup(value));
|
|
}
|
|
}
|
|
if(urip) {
|
|
*urip = uri;
|
|
uri = NULL;
|
|
}
|
|
|
|
done:
|
|
nclistfreeall(tmp);
|
|
nullfree(str);
|
|
if(uri != NULL) ncurifree(uri);
|
|
return check(stat);
|
|
}
|
|
|
|
/* Split a key=value pair */
|
|
static int
|
|
parsepair(const char* pair, char** keyp, char** valuep)
|
|
{
|
|
const char* p;
|
|
char* key = NULL;
|
|
char* value = NULL;
|
|
|
|
if(pair == NULL)
|
|
return NC_EINVAL; /* empty pair */
|
|
if(pair[0] == '\0' || pair[0] == '=')
|
|
return NC_EINVAL; /* no key */
|
|
p = strchr(pair,'=');
|
|
if(p == NULL) {
|
|
value = NULL;
|
|
key = strdup(pair);
|
|
} else {
|
|
ptrdiff_t len = (p-pair);
|
|
if((key = malloc(len+1))==NULL) return NC_ENOMEM;
|
|
memcpy(key,pair,len);
|
|
key[len] = '\0';
|
|
if(p[1] == '\0')
|
|
value = NULL;
|
|
else
|
|
value = strdup(p+1);
|
|
}
|
|
if(keyp) {*keyp = key; key = NULL;};
|
|
if(valuep) {*valuep = value; value = NULL;};
|
|
nullfree(key);
|
|
nullfree(value);
|
|
return NC_NOERR;
|
|
}
|
|
|
|
#if 0
|
|
static int
|
|
parseurlmode(const char* modestr, NClist* list)
|
|
{
|
|
int stat = NC_NOERR;
|
|
const char* p = NULL;
|
|
const char* endp = NULL;
|
|
|
|
if(modestr == NULL || *modestr == '\0') goto done;
|
|
|
|
/* Split modestr at the commas or EOL */
|
|
p = modestr;
|
|
for(;;) {
|
|
char* s;
|
|
ptrdiff_t slen;
|
|
endp = strchr(p,',');
|
|
if(endp == NULL) endp = p + strlen(p);
|
|
slen = (endp - p);
|
|
if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
|
|
memcpy(s,p,slen);
|
|
s[slen] = '\0';
|
|
nclistpush(list,s);
|
|
if(*endp == '\0') break;
|
|
p = endp+1;
|
|
}
|
|
|
|
done:
|
|
return check(stat);
|
|
}
|
|
#endif
|
|
|
|
/* Split a string at a given char */
|
|
static int
|
|
parseonchar(const char* s, int ch, NClist* segments)
|
|
{
|
|
int stat = NC_NOERR;
|
|
const char* p = NULL;
|
|
const char* endp = NULL;
|
|
|
|
if(s == NULL || *s == '\0') goto done;
|
|
|
|
p = s;
|
|
for(;;) {
|
|
char* q;
|
|
ptrdiff_t slen;
|
|
endp = strchr(p,ch);
|
|
if(endp == NULL) endp = p + strlen(p);
|
|
slen = (endp - p);
|
|
if((q = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
|
|
memcpy(q,p,slen);
|
|
q[slen] = '\0';
|
|
nclistpush(segments,q);
|
|
if(*endp == '\0') break;
|
|
p = endp+1;
|
|
}
|
|
|
|
done:
|
|
return check(stat);
|
|
}
|
|
|
|
/* Convert a key,value envv pairlist into a delimited string*/
|
|
static char*
|
|
envvlist2string(NClist* envv, const char* delim)
|
|
{
|
|
int i;
|
|
NCbytes* buf = NULL;
|
|
char* result = NULL;
|
|
|
|
if(envv == NULL || nclistlength(envv) == 0) return NULL;
|
|
buf = ncbytesnew();
|
|
for(i=0;i<nclistlength(envv);i+=2) {
|
|
const char* key = nclistget(envv,i);
|
|
const char* val = nclistget(envv,i+1);
|
|
if(key == NULL || strlen(key) == 0) continue;
|
|
assert(val != NULL);
|
|
if(i > 0) ncbytescat(buf,"&");
|
|
ncbytescat(buf,key);
|
|
if(val != NULL && val[0] != '\0') {
|
|
ncbytescat(buf,"=");
|
|
ncbytescat(buf,val);
|
|
}
|
|
}
|
|
result = ncbytesextract(buf);
|
|
ncbytesfree(buf);
|
|
return result;
|
|
}
|
|
|
|
/* Convert a list into a comma'd string */
|
|
static char*
|
|
list2string(NClist* list)
|
|
{
|
|
int i;
|
|
NCbytes* buf = NULL;
|
|
char* result = NULL;
|
|
|
|
if(list == NULL || nclistlength(list)==0) return strdup("");
|
|
buf = ncbytesnew();
|
|
for(i=0;i<nclistlength(list);i++) {
|
|
const char* m = nclistget(list,i);
|
|
if(m == NULL || strlen(m) == 0) continue;
|
|
if(i > 0) ncbytescat(buf,",");
|
|
ncbytescat(buf,m);
|
|
}
|
|
result = ncbytesextract(buf);
|
|
ncbytesfree(buf);
|
|
if(result == NULL) result = strdup("");
|
|
return result;
|
|
}
|
|
|
|
/* Given a mode= argument, fill in the impl */
|
|
static int
|
|
processmodearg(const char* arg, NCmodel* model)
|
|
{
|
|
int stat = NC_NOERR;
|
|
struct FORMATMODES* format = formatmodes;
|
|
for(;format->tag;format++) {
|
|
if(strcmp(format->tag,arg)==0) {
|
|
model->impl = format->impl;
|
|
if(format->format != 0) model->format = format->format;
|
|
}
|
|
}
|
|
return check(stat);
|
|
}
|
|
|
|
/* Given an envv fragment list, do macro replacement */
|
|
static int
|
|
processmacros(NClist** fraglenvp)
|
|
{
|
|
int stat = NC_NOERR;
|
|
const struct MACRODEF* macros = NULL;
|
|
NClist* fraglenv = NULL;
|
|
NClist* expanded = NULL;
|
|
|
|
if(fraglenvp == NULL || nclistlength(*fraglenvp) == 0) goto done;
|
|
fraglenv = *fraglenvp;
|
|
expanded = nclistnew();
|
|
while(nclistlength(fraglenv) > 0) {
|
|
int found = 0;
|
|
char* key = NULL;
|
|
char* value = NULL;
|
|
key = nclistremove(fraglenv,0); /* remove from changing front */
|
|
value = nclistremove(fraglenv,0); /* remove from changing front */
|
|
if(strlen(value) == 0) { /* must be a singleton */
|
|
for(macros=macrodefs;macros->name;macros++) {
|
|
if(strcmp(macros->name,key)==0) {
|
|
char* const * p;
|
|
nclistpush(expanded,strdup(macros->defkey));
|
|
for(p=macros->defvalues;*p;p++)
|
|
nclistpush(expanded,strdup(*p));
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if(!found) {/* pass thru */
|
|
nclistpush(expanded,strdup(key));
|
|
nclistpush(expanded,strdup(value));
|
|
}
|
|
nullfree(key);
|
|
nullfree(value);
|
|
}
|
|
*fraglenvp = expanded; expanded = NULL;
|
|
|
|
done:
|
|
nclistfreeall(expanded);
|
|
nclistfreeall(fraglenv);
|
|
return check(stat);
|
|
}
|
|
|
|
/* Process mode flag inferences */
|
|
static int
|
|
processinferences(NClist* fraglenv)
|
|
{
|
|
int stat = NC_NOERR;
|
|
const char* modeval = NULL;
|
|
NClist* modes = NULL;
|
|
NClist* newmodes = nclistnew();
|
|
int i,inferred = 0;
|
|
char* newmodeval = NULL;
|
|
|
|
if(fraglenv == NULL || nclistlength(fraglenv) == 0) goto done;
|
|
|
|
/* Get "mode" entry */
|
|
if((modeval = getmodekey(fraglenv))==NULL) goto done;
|
|
|
|
/* Get the mode as list */
|
|
modes = parsemode(modeval);
|
|
|
|
/* Repeatedly walk the mode list until no more new positive inferences */
|
|
do {
|
|
for(i=0;i<nclistlength(modes);i++) {
|
|
const char* mode = nclistget(modes,i);
|
|
inferred = inferone(mode,newmodes);
|
|
nclistpush(newmodes,strdup(mode)); /* keep key */
|
|
if(!inferred) nclistpush(newmodes,strdup(mode));
|
|
}
|
|
} while(inferred);
|
|
|
|
/* Remove negative inferences */
|
|
for(i=0;i<nclistlength(modes);i++) {
|
|
const char* mode = nclistget(modes,i);
|
|
inferred = negateone(mode,newmodes);
|
|
}
|
|
|
|
/* Store new mode value */
|
|
if((newmodeval = list2string(newmodes))== NULL)
|
|
{stat = NC_ENOMEM; goto done;}
|
|
if((stat=replacemode(fraglenv,newmodeval))) goto done;
|
|
modeval = NULL;
|
|
|
|
done:
|
|
nullfree(newmodeval);
|
|
nclistfreeall(modes);
|
|
nclistfreeall(newmodes);
|
|
return check(stat);
|
|
}
|
|
|
|
static int
|
|
negateone(const char* mode, NClist* newmodes)
|
|
{
|
|
const struct MODEINFER* tests = modenegations;
|
|
int changed = 0;
|
|
for(;tests->key;tests++) {
|
|
int i;
|
|
if(strcasecmp(tests->key,mode)==0) {
|
|
/* Find and remove all instances of the inference value */
|
|
for(i=nclistlength(newmodes)-1;i>=0;i--) {
|
|
char* candidate = nclistget(newmodes,i);
|
|
if(strcasecmp(candidate,tests->inference)==0) {
|
|
nclistremove(newmodes,i);
|
|
nullfree(candidate);
|
|
changed = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return changed;
|
|
}
|
|
|
|
static int
|
|
inferone(const char* mode, NClist* newmodes)
|
|
{
|
|
const struct MODEINFER* tests = modeinferences;
|
|
int changed = 0;
|
|
for(;tests->key;tests++) {
|
|
if(strcasecmp(tests->key,mode)==0) {
|
|
/* Append the inferred mode; dups removed later */
|
|
nclistpush(newmodes,strdup(tests->inference));
|
|
changed = 1;
|
|
}
|
|
}
|
|
return changed;
|
|
}
|
|
|
|
static int
|
|
mergekey(NClist** valuesp)
|
|
{
|
|
int i,j;
|
|
int stat = NC_NOERR;
|
|
NClist* values = *valuesp;
|
|
NClist* allvalues = nclistnew();
|
|
NClist* newvalues = nclistnew();
|
|
char* value = NULL;
|
|
|
|
for(i=0;i<nclistlength(values);i++) {
|
|
char* val1 = nclistget(values,i);
|
|
/* split on commas and put pieces into allvalues */
|
|
if((stat=parseonchar(val1,',',allvalues))) goto done;
|
|
}
|
|
/* Remove duplicates and "" */
|
|
while(nclistlength(allvalues) > 0) {
|
|
value = nclistremove(allvalues,0);
|
|
if(strlen(value) == 0) {
|
|
nullfree(value); value = NULL;
|
|
} else {
|
|
for(j=0;j<nclistlength(newvalues);j++) {
|
|
char* candidate = nclistget(newvalues,j);
|
|
if(strcasecmp(candidate,value)==0)
|
|
{nullfree(value); value = NULL; break;}
|
|
}
|
|
}
|
|
if(value != NULL) {nclistpush(newvalues,value); value = NULL;}
|
|
}
|
|
/* Make sure to have at least 1 value */
|
|
if(nclistlength(newvalues)==0) nclistpush(newvalues,strdup(""));
|
|
*valuesp = values; values = NULL;
|
|
|
|
done:
|
|
nclistfree(allvalues);
|
|
nclistfreeall(values);
|
|
nclistfreeall(newvalues);
|
|
return check(stat);
|
|
}
|
|
|
|
static int
|
|
lcontains(NClist* l, const char* key0)
|
|
{
|
|
int i;
|
|
for(i=0;i<nclistlength(l);i++) {
|
|
const char* key1 = nclistget(l,i);
|
|
if(strcasecmp(key0,key1)==0) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Warning values should not use nclistfreeall */
|
|
static void
|
|
collectvaluesbykey(NClist* fraglenv, const char* key, NClist* values)
|
|
{
|
|
int i;
|
|
/* collect all the values with the same key (including this one) */
|
|
for(i=0;i<nclistlength(fraglenv);i+=2) {
|
|
const char* key2 = nclistget(fraglenv,i);
|
|
if(strcasecmp(key,key2)==0) {
|
|
const char* value2 = nclistget(fraglenv,i+1);
|
|
nclistpush(values,value2); value2 = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Warning allkeys should not use nclistfreeall */
|
|
static void
|
|
collectallkeys(NClist* fraglenv, NClist* allkeys)
|
|
{
|
|
int i;
|
|
/* collect all the distinct keys */
|
|
for(i=0;i<nclistlength(fraglenv);i+=2) {
|
|
char* key = nclistget(fraglenv,i);
|
|
if(!lcontains(allkeys,key)) {
|
|
nclistpush(allkeys,key);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Given a fragment envv list, coalesce duplicate keys and remove duplicate values*/
|
|
static int
|
|
cleanfragments(NClist** fraglenvp)
|
|
{
|
|
int i,stat = NC_NOERR;
|
|
NClist* fraglenv = NULL;
|
|
NClist* tmp = NULL;
|
|
NClist* allkeys = NULL;
|
|
NClist* newlist = NULL;
|
|
NCbytes* buf = NULL;
|
|
char* key = NULL;
|
|
char* value = NULL;
|
|
|
|
if(fraglenvp == NULL || nclistlength(*fraglenvp) == 0) return NC_NOERR;
|
|
fraglenv = *fraglenvp; /* take control of this list */
|
|
*fraglenvp = NULL;
|
|
newlist = nclistnew();
|
|
buf = ncbytesnew();
|
|
allkeys = nclistnew();
|
|
tmp = nclistnew();
|
|
|
|
/* collect all unique keys */
|
|
collectallkeys(fraglenv,allkeys);
|
|
/* Collect all values for same key across all fragments */
|
|
for(i=0;i<nclistlength(allkeys);i++) {
|
|
key = nclistget(allkeys,i);
|
|
collectvaluesbykey(fraglenv,key,tmp);
|
|
/* merge the key values, remove duplicate */
|
|
if((stat=mergekey(&tmp))) goto done;
|
|
/* Construct key,value pair and insert into newlist */
|
|
key = strdup(key);
|
|
nclistpush(newlist,key);
|
|
value = list2string(tmp);
|
|
nclistpush(newlist,value);
|
|
nclistclear(tmp);
|
|
}
|
|
*fraglenvp = newlist; newlist = NULL;
|
|
done:
|
|
nclistfree(allkeys);
|
|
nclistfree(tmp);
|
|
ncbytesfree(buf);
|
|
nclistfreeall(fraglenv);
|
|
nclistfreeall(newlist);
|
|
return check(stat);
|
|
}
|
|
|
|
/* process non-mode fragment keys in case they hold significance; currently not */
|
|
static int
|
|
processfragmentkeys(const char* key, const char* value, NCmodel* model)
|
|
{
|
|
return NC_NOERR;
|
|
}
|
|
|
|
/*
|
|
Infer from the mode + useparallel
|
|
only call if iscreate or file is not easily readable.
|
|
*/
|
|
static int
|
|
NC_omodeinfer(int useparallel, int cmode, NCmodel* model)
|
|
{
|
|
int stat = NC_NOERR;
|
|
|
|
/* If no format flags are set, then use default */
|
|
if(!fIsSet(cmode,NC_FORMAT_ALL))
|
|
set_default_mode(&cmode);
|
|
|
|
/* Process the cmode; may override some already set flags. The
|
|
* user-defined formats must be checked first. They may choose to
|
|
* use some of the other flags, like NC_NETCDF4, so we must first
|
|
* check NC_UDF0 and NC_UDF1 before checking for any other
|
|
* flag. */
|
|
if(fIsSet(cmode,(NC_UDF0|NC_UDF1))) {
|
|
model->format = NC_FORMAT_NETCDF4;
|
|
if(fIsSet(cmode,NC_UDF0)) {
|
|
model->impl = NC_FORMATX_UDF0;
|
|
} else {
|
|
model->impl = NC_FORMATX_UDF1;
|
|
}
|
|
goto done;
|
|
}
|
|
|
|
if(fIsSet(cmode,NC_64BIT_OFFSET)) {
|
|
model->impl = NC_FORMATX_NC3;
|
|
model->format = NC_FORMAT_64BIT_OFFSET;
|
|
goto done;
|
|
}
|
|
|
|
if(fIsSet(cmode,NC_64BIT_DATA)) {
|
|
model->impl = NC_FORMATX_NC3;
|
|
model->format = NC_FORMAT_64BIT_DATA;
|
|
goto done;
|
|
}
|
|
|
|
if(fIsSet(cmode,NC_NETCDF4)) {
|
|
model->impl = NC_FORMATX_NC4;
|
|
if(fIsSet(cmode,NC_CLASSIC_MODEL))
|
|
model->format = NC_FORMAT_NETCDF4_CLASSIC;
|
|
else
|
|
model->format = NC_FORMAT_NETCDF4;
|
|
goto done;
|
|
}
|
|
|
|
/* Default to classic model */
|
|
model->format = NC_FORMAT_CLASSIC;
|
|
model->impl = NC_FORMATX_NC3;
|
|
|
|
done:
|
|
/* Apply parallel flag */
|
|
if(useparallel) {
|
|
if(model->impl == NC_FORMATX_NC3)
|
|
model->impl = NC_FORMATX_PNETCDF;
|
|
}
|
|
return check(stat);
|
|
}
|
|
|
|
/*
|
|
If the mode flags do not necessarily specify the
|
|
format, then default it by adding in appropriate flags.
|
|
*/
|
|
|
|
static void
|
|
set_default_mode(int* modep)
|
|
{
|
|
int mode = *modep;
|
|
int dfaltformat;
|
|
|
|
dfaltformat = nc_get_default_format();
|
|
switch (dfaltformat) {
|
|
case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break;
|
|
case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break;
|
|
case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break;
|
|
case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break;
|
|
case NC_FORMAT_CLASSIC: /* fall thru */
|
|
default: break; /* default to classic */
|
|
}
|
|
*modep = mode; /* final result */
|
|
}
|
|
|
|
/**************************************************/
|
|
/*
|
|
Infer model for this dataset using some
|
|
combination of cmode, path, and reading the dataset.
|
|
See the documentation in docs/internal.dox.
|
|
|
|
@param path
|
|
@param omode
|
|
@param iscreate
|
|
@param useparallel
|
|
@param params
|
|
@param model
|
|
@param newpathp
|
|
*/
|
|
|
|
int
|
|
NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp)
|
|
{
|
|
int i,stat = NC_NOERR;
|
|
NCURI* uri = NULL;
|
|
int omode = *omodep;
|
|
NClist* fraglenv = nclistnew();
|
|
NClist* modeargs = nclistnew();
|
|
char* sfrag = NULL;
|
|
const char* modeval = NULL;
|
|
char* abspath = NULL;
|
|
|
|
/* Phase 1:
|
|
1. convert special protocols to http|https
|
|
2. begin collecting fragments
|
|
*/
|
|
if((stat = processuri(path, &uri, fraglenv))) goto done;
|
|
|
|
if(uri != NULL) {
|
|
#ifdef DEBUG
|
|
printlist(fraglenv,"processuri");
|
|
#endif
|
|
|
|
/* Phase 2: Expand macros and add to fraglenv */
|
|
if((stat = processmacros(&fraglenv))) goto done;
|
|
#ifdef DEBUG
|
|
printlist(fraglenv,"processmacros");
|
|
#endif
|
|
|
|
/* Cleanup the fragment list */
|
|
if((stat = cleanfragments(&fraglenv))) goto done;
|
|
|
|
/* Phase 2a: Expand mode inferences and add to fraglenv */
|
|
if((stat = processinferences(fraglenv))) goto done;
|
|
#ifdef DEBUG
|
|
printlist(fraglenv,"processinferences");
|
|
#endif
|
|
|
|
/* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
|
|
if((stat = cleanfragments(&fraglenv))) goto done;
|
|
#ifdef DEBUG
|
|
printlist(fraglenv,"cleanfragments");
|
|
#endif
|
|
|
|
/* Phase 4: Rebuild the url fragment and rebuilt the url */
|
|
sfrag = envvlist2string(fraglenv,"&");
|
|
nclistfreeall(fraglenv); fraglenv = NULL;
|
|
#ifdef DEBUG
|
|
fprintf(stderr,"frag final: %s\n",sfrag);
|
|
#endif
|
|
ncurisetfragments(uri,sfrag);
|
|
nullfree(sfrag); sfrag = NULL;
|
|
|
|
/* If s3, then rebuild the url */
|
|
if(NC_iss3(uri)) {
|
|
NCURI* newuri = NULL;
|
|
if((stat = NC_s3urlrebuild(uri,&newuri,NULL,NULL))) goto done;
|
|
ncurifree(uri);
|
|
uri = newuri;
|
|
} else if(strcmp(uri->protocol,"file")==0) {
|
|
/* convert path to absolute */
|
|
char* canon = NULL;
|
|
abspath = NCpathabsolute(uri->path);
|
|
if((stat = NCpathcanonical(abspath,&canon))) goto done;
|
|
nullfree(abspath);
|
|
abspath = canon; canon = NULL;
|
|
if((stat = ncurisetpath(uri,abspath))) goto done;
|
|
}
|
|
|
|
/* rebuild the path */
|
|
if(newpathp) {
|
|
*newpathp = ncuribuild(uri,NULL,NULL,NCURIALL);
|
|
#ifdef DEBUG
|
|
fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr);
|
|
#endif
|
|
}
|
|
|
|
/* Phase 5: Process the mode key to see if we can tell the formatx */
|
|
modeval = ncurifragmentlookup(uri,"mode");
|
|
if(modeval != NULL) {
|
|
if((stat = parseonchar(modeval,',',modeargs))) goto done;
|
|
for(i=0;i<nclistlength(modeargs);i++) {
|
|
const char* arg = nclistget(modeargs,i);
|
|
if((stat=processmodearg(arg,model))) goto done;
|
|
}
|
|
}
|
|
|
|
/* Phase 6: Process the non-mode keys to see if we can tell the formatx */
|
|
if(!modelcomplete(model)) {
|
|
const char** p = ncurifragmentparams(uri); /* envv format */
|
|
if(p != NULL) {
|
|
for(;*p;p+=2) {
|
|
const char* key = p[0];
|
|
const char* value = p[1];;
|
|
if((stat=processfragmentkeys(key,value,model))) goto done;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Phase 7: Special cases: if this is a URL and model.impl is still not defined */
|
|
/* Phase7a: Default is DAP2 */
|
|
if(!modelcomplete(model)) {
|
|
model->impl = NC_FORMATX_DAP2;
|
|
model->format = NC_FORMAT_NC3;
|
|
}
|
|
|
|
} else {/* Not URL */
|
|
if(*newpathp) *newpathp = NULL;
|
|
}
|
|
|
|
/* Phase 8: mode inference from mode flags */
|
|
/* The modeargs did not give us a model (probably not a URL).
|
|
So look at the combination of mode flags and the useparallel flag */
|
|
if(!modelcomplete(model)) {
|
|
if((stat = NC_omodeinfer(useparallel,omode,model))) goto done;
|
|
}
|
|
|
|
/* Phase 9: Infer from file content, if possible;
|
|
this has highest precedence, so it may override
|
|
previous decisions. Note that we do this last
|
|
because we need previously determined model info
|
|
to guess if this file is readable.
|
|
*/
|
|
if(!iscreate && isreadable(uri,model)) {
|
|
/* Ok, we need to try to read the file */
|
|
if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done;
|
|
}
|
|
|
|
/* Need a decision */
|
|
if(!modelcomplete(model))
|
|
{stat = NC_ENOTNC; goto done;}
|
|
|
|
/* Force flag consistency */
|
|
switch (model->impl) {
|
|
case NC_FORMATX_NC4:
|
|
case NC_FORMATX_NC_HDF4:
|
|
case NC_FORMATX_DAP4:
|
|
case NC_FORMATX_UDF0:
|
|
case NC_FORMATX_UDF1:
|
|
case NC_FORMATX_NCZARR:
|
|
omode |= NC_NETCDF4;
|
|
if(model->format == NC_FORMAT_NETCDF4_CLASSIC)
|
|
omode |= NC_CLASSIC_MODEL;
|
|
break;
|
|
case NC_FORMATX_NC3:
|
|
omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
|
|
if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
|
|
else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
|
|
break;
|
|
case NC_FORMATX_PNETCDF:
|
|
omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
|
|
if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
|
|
else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
|
|
break;
|
|
case NC_FORMATX_DAP2:
|
|
omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL);
|
|
break;
|
|
default:
|
|
{stat = NC_ENOTNC; goto done;}
|
|
}
|
|
|
|
done:
|
|
nullfree(sfrag);
|
|
nullfree(abspath);
|
|
ncurifree(uri);
|
|
nclistfreeall(modeargs);
|
|
nclistfreeall(fraglenv);
|
|
*omodep = omode; /* in/out */
|
|
return check(stat);
|
|
}
|
|
|
|
static int
|
|
isreadable(NCURI* uri, NCmodel* model)
|
|
{
|
|
int canread = 0;
|
|
struct Readable* r;
|
|
/* Step 1: Look up the implementation */
|
|
for(r=readable;r->impl;r++) {
|
|
if(model->impl == r->impl) {canread = r->readable; break;}
|
|
}
|
|
/* Step 2: check for bytes mode */
|
|
if(!canread && NC_testmode(uri,"bytes") && (model->impl == NC_FORMATX_NC4 || model->impl == NC_FORMATX_NC_HDF5))
|
|
canread = 1;
|
|
return canread;
|
|
}
|
|
|
|
#if 0
|
|
static char*
|
|
emptyify(char* s)
|
|
{
|
|
if(s == NULL) s = strdup("");
|
|
return strdup(s);
|
|
}
|
|
|
|
static const char*
|
|
nullify(const char* s)
|
|
{
|
|
if(s != NULL && strlen(s) == 0)
|
|
return NULL;
|
|
return s;
|
|
}
|
|
#endif
|
|
|
|
/**************************************************/
|
|
/**************************************************/
|
|
/**
|
|
* Provide a hidden interface to allow utilities
|
|
* to check if a given path name is really a url.
|
|
* If not, put null in basenamep, else put basename of the url path
|
|
* minus any extension into basenamep; caller frees.
|
|
* Return 1 if it looks like a url, 0 otherwise.
|
|
*/
|
|
|
|
int
|
|
nc__testurl(const char* path0, char** basenamep)
|
|
{
|
|
NCURI* uri = NULL;
|
|
int ok = 0;
|
|
char* path = NULL;
|
|
|
|
if(!ncuriparse(path0,&uri)) {
|
|
char* p;
|
|
char* q;
|
|
path = strdup(uri->path);
|
|
if(path == NULL||strlen(path)==0) goto done;
|
|
p = strrchr(path, '/');
|
|
if(p == NULL) p = path; else p++;
|
|
q = strrchr(p,'.');
|
|
if(q != NULL) *q = '\0';
|
|
if(strlen(p) == 0) goto done;
|
|
if(basenamep)
|
|
*basenamep = strdup(p);
|
|
ok = 1;
|
|
}
|
|
done:
|
|
ncurifree(uri);
|
|
nullfree(path);
|
|
return ok;
|
|
}
|
|
|
|
/**************************************************/
|
|
/* Envv list utilities */
|
|
|
|
static const char*
|
|
getmodekey(const NClist* envv)
|
|
{
|
|
int i;
|
|
/* Get "mode" entry */
|
|
for(i=0;i<nclistlength(envv);i+=2) {
|
|
char* key = NULL;
|
|
key = nclistget(envv,i);
|
|
if(strcasecmp(key,"mode")==0)
|
|
return nclistget(envv,i+1);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static int
|
|
replacemode(NClist* envv, const char* newval)
|
|
{
|
|
int i;
|
|
/* Get "mode" entry */
|
|
for(i=0;i<nclistlength(envv);i+=2) {
|
|
char* key = NULL;
|
|
char* val = NULL;
|
|
key = nclistget(envv,i);
|
|
if(strcasecmp(key,"mode")==0) {
|
|
val = nclistget(envv,i+1);
|
|
nclistset(envv,i+1,strdup(newval));
|
|
nullfree(val);
|
|
return NC_NOERR;
|
|
}
|
|
}
|
|
return NC_EINVAL;
|
|
}
|
|
|
|
static NClist*
|
|
parsemode(const char* modeval)
|
|
{
|
|
NClist* modes = nclistnew();
|
|
if(modeval)
|
|
(void)parseonchar(modeval,',',modes);/* split on commas */
|
|
return modes;
|
|
}
|
|
|
|
/**************************************************/
|
|
/**
|
|
* @internal Given an existing file, figure out its format and return
|
|
* that format value (NC_FORMATX_XXX) in model arg. Assume any path
|
|
* conversion was already performed at a higher level.
|
|
*
|
|
* @param path File name.
|
|
* @param flags
|
|
* @param use_parallel
|
|
* @param parameters
|
|
* @param model Pointer that gets the model to use for the dispatch table.
|
|
* @param version Pointer that gets version of the file.
|
|
*
|
|
* @return ::NC_NOERR No error.
|
|
* @author Dennis Heimbigner
|
|
*/
|
|
static int
|
|
check_file_type(const char *path, int omode, int use_parallel,
|
|
void *parameters, NCmodel* model, NCURI* uri)
|
|
{
|
|
char magic[NC_MAX_MAGIC_NUMBER_LEN];
|
|
int status = NC_NOERR;
|
|
struct MagicFile magicinfo;
|
|
#ifdef _WIN32
|
|
NC* nc = NULL;
|
|
#endif
|
|
|
|
memset((void*)&magicinfo,0,sizeof(magicinfo));
|
|
|
|
#ifdef _WIN32 /* including MINGW */
|
|
/* Windows does not handle well multiple handles to the same file.
|
|
So if file is already open/created, then find it and just get the
|
|
model from that. */
|
|
if((nc = find_in_NCList_by_name(path)) != NULL) {
|
|
int format = 0;
|
|
/* Get the model from this NC */
|
|
if((status = nc_inq_format_extended(nc->ext_ncid,&format,NULL))) goto done;
|
|
model->impl = format;
|
|
if((status = nc_inq_format(nc->ext_ncid,&format))) goto done;
|
|
model->format = format;
|
|
goto done;
|
|
}
|
|
#endif
|
|
|
|
magicinfo.path = path; /* do not free */
|
|
magicinfo.uri = uri; /* do not free */
|
|
magicinfo.omode = omode;
|
|
magicinfo.model = model; /* do not free */
|
|
magicinfo.parameters = parameters; /* do not free */
|
|
#ifdef USE_STDIO
|
|
magicinfo.use_parallel = 0;
|
|
#else
|
|
magicinfo.use_parallel = use_parallel;
|
|
#endif
|
|
|
|
if((status = openmagic(&magicinfo))) goto done;
|
|
|
|
/* Verify we have a large enough file */
|
|
if(magicinfo.filelen < (unsigned long long)MAGIC_NUMBER_LEN)
|
|
{status = NC_ENOTNC; goto done;}
|
|
if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) {
|
|
status = NC_ENOTNC;
|
|
goto done;
|
|
}
|
|
|
|
/* Look at the magic number */
|
|
if(NC_interpret_magic_number(magic,model) == NC_NOERR
|
|
&& model->format != 0) {
|
|
if (use_parallel && (model->format == NC_FORMAT_NC3 || model->impl == NC_FORMATX_NC3))
|
|
/* this is called from nc_open_par() and file is classic */
|
|
model->impl = NC_FORMATX_PNETCDF;
|
|
goto done; /* found something */
|
|
}
|
|
|
|
/* Remaining case when implementation is an HDF5 file;
|
|
search forward at starting at 512
|
|
and doubling to see if we have HDF5 magic number */
|
|
{
|
|
long pos = 512L;
|
|
for(;;) {
|
|
if((pos+MAGIC_NUMBER_LEN) > magicinfo.filelen)
|
|
{status = NC_ENOTNC; goto done;}
|
|
if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR)
|
|
{status = NC_ENOTNC; goto done; }
|
|
NC_interpret_magic_number(magic,model);
|
|
if(model->impl == NC_FORMATX_NC4) break;
|
|
/* double and try again */
|
|
pos = 2*pos;
|
|
}
|
|
}
|
|
done:
|
|
closemagic(&magicinfo);
|
|
return check(status);
|
|
}
|
|
|
|
/**
|
|
\internal
|
|
\ingroup datasets
|
|
Provide open, read and close for use when searching for magic numbers
|
|
*/
|
|
static int
|
|
openmagic(struct MagicFile* file)
|
|
{
|
|
int status = NC_NOERR;
|
|
|
|
if(fIsSet(file->omode,NC_INMEMORY)) {
|
|
/* Get its length */
|
|
NC_memio* meminfo = (NC_memio*)file->parameters;
|
|
assert(meminfo != NULL);
|
|
file->filelen = (long long)meminfo->size;
|
|
#ifdef ENABLE_BYTERANGE
|
|
} else if(file->uri != NULL) {
|
|
#ifdef ENABLE_S3_SDK
|
|
/* If this is an S3 URL, then handle specially */
|
|
if(NC_iss3(file->uri)) {
|
|
if((status = NC_s3urlprocess(file->uri,&file->s3))) goto done;
|
|
if((file->s3client = NC_s3sdkcreateclient(&file->s3))==NULL) {status = NC_EURL; goto done;}
|
|
if((status = NC_s3sdkinfo(file->s3client,file->s3.bucket,file->s3.rootkey,&file->filelen,&file->errmsg)))
|
|
goto done;
|
|
file->iss3 = 1;
|
|
} else
|
|
#endif
|
|
{
|
|
/* Construct a URL minus any fragment */
|
|
file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC);
|
|
/* Open the curl handle */
|
|
if((status=nc_http_init(&file->state))) goto done;
|
|
if((status=nc_http_size(file->state,file->curlurl,&file->filelen))) goto done;
|
|
}
|
|
#endif /*BYTERANGE*/
|
|
} else {
|
|
#ifdef USE_PARALLEL
|
|
if (file->use_parallel) {
|
|
int retval;
|
|
MPI_Offset size;
|
|
assert(file->parameters != NULL);
|
|
if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm,
|
|
(char*)file->path,MPI_MODE_RDONLY,
|
|
((NC_MPI_INFO*)file->parameters)->info,
|
|
&file->fh)) != MPI_SUCCESS) {
|
|
#ifdef MPI_ERR_NO_SUCH_FILE
|
|
int errorclass;
|
|
MPI_Error_class(retval, &errorclass);
|
|
if (errorclass == MPI_ERR_NO_SUCH_FILE)
|
|
#ifdef NC_ENOENT
|
|
status = NC_ENOENT;
|
|
#else
|
|
status = errno;
|
|
#endif
|
|
else
|
|
#endif
|
|
status = NC_EPARINIT;
|
|
file->fh = MPI_FILE_NULL;
|
|
goto done;
|
|
}
|
|
/* Get its length */
|
|
if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS)
|
|
{status = NC_EPARINIT; goto done;}
|
|
file->filelen = (long long)size;
|
|
} else
|
|
#endif /* USE_PARALLEL */
|
|
{
|
|
if (file->path == NULL || strlen(file->path) == 0)
|
|
{status = NC_EINVAL; goto done;}
|
|
file->fp = NCfopen(file->path, "r");
|
|
if(file->fp == NULL)
|
|
{status = errno; goto done;}
|
|
/* Get its length */
|
|
{
|
|
int fd = fileno(file->fp);
|
|
#ifdef _WIN32
|
|
__int64 len64 = _filelengthi64(fd);
|
|
if(len64 < 0)
|
|
{status = errno; goto done;}
|
|
file->filelen = (long long)len64;
|
|
#else
|
|
off_t size;
|
|
size = lseek(fd, 0, SEEK_END);
|
|
if(size == -1)
|
|
{status = errno; goto done;}
|
|
file->filelen = (long long)size;
|
|
#endif
|
|
}
|
|
rewind(file->fp);
|
|
}
|
|
}
|
|
done:
|
|
return check(status);
|
|
}
|
|
|
|
static int
|
|
readmagic(struct MagicFile* file, long pos, char* magic)
|
|
{
|
|
int status = NC_NOERR;
|
|
NCbytes* buf = ncbytesnew();
|
|
|
|
memset(magic,0,MAGIC_NUMBER_LEN);
|
|
if(fIsSet(file->omode,NC_INMEMORY)) {
|
|
char* mempos;
|
|
NC_memio* meminfo = (NC_memio*)file->parameters;
|
|
if((pos + MAGIC_NUMBER_LEN) > meminfo->size)
|
|
{status = NC_EINMEMORY; goto done;}
|
|
mempos = ((char*)meminfo->memory) + pos;
|
|
memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN);
|
|
#ifdef DEBUG
|
|
printmagic("XXX: readmagic",magic,file);
|
|
#endif
|
|
#ifdef ENABLE_BYTERANGE
|
|
} else if(file->uri != NULL) {
|
|
fileoffset_t start = (size_t)pos;
|
|
fileoffset_t count = MAGIC_NUMBER_LEN;
|
|
#ifdef ENABLE_S3_SDK
|
|
if(file->iss3) {
|
|
if((status = NC_s3sdkread(file->s3client,file->s3.bucket,file->s3.rootkey,start,count,(void*)magic,&file->errmsg)))
|
|
{goto done;}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
status = nc_http_read(file->state, file->curlurl, start, count, buf);
|
|
if (status == NC_NOERR) {
|
|
if (ncbyteslength(buf) != count)
|
|
status = NC_EINVAL;
|
|
else
|
|
memcpy(magic, ncbytescontents(buf), count);
|
|
}
|
|
}
|
|
#endif
|
|
} else {
|
|
#ifdef USE_PARALLEL
|
|
if (file->use_parallel) {
|
|
MPI_Status mstatus;
|
|
int retval;
|
|
if((retval = MPI_File_read_at_all(file->fh, pos, magic,
|
|
MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS)
|
|
{status = NC_EPARINIT; goto done;}
|
|
}
|
|
else
|
|
#endif /* USE_PARALLEL */
|
|
{ /* Ordinary read */
|
|
long i;
|
|
i = fseek(file->fp, pos, SEEK_SET);
|
|
if (i < 0) { status = errno; goto done; }
|
|
ncbytessetlength(buf, 0);
|
|
if ((status = NC_readfileF(file->fp, buf, MAGIC_NUMBER_LEN))) goto done;
|
|
memcpy(magic, ncbytescontents(buf), MAGIC_NUMBER_LEN);
|
|
}
|
|
}
|
|
|
|
done:
|
|
ncbytesfree(buf);
|
|
if(file && file->fp) clearerr(file->fp);
|
|
return check(status);
|
|
}
|
|
|
|
/**
|
|
* Close the file opened to check for magic number.
|
|
*
|
|
* @param file pointer to the MagicFile struct for this open file.
|
|
* @returns NC_NOERR for success
|
|
* @returns NC_EPARINIT if there was a problem closing file with MPI
|
|
* (parallel builds only).
|
|
* @author Dennis Heimbigner
|
|
*/
|
|
static int
|
|
closemagic(struct MagicFile* file)
|
|
{
|
|
int status = NC_NOERR;
|
|
|
|
if(fIsSet(file->omode,NC_INMEMORY)) {
|
|
/* noop */
|
|
#ifdef ENABLE_BYTERANGE
|
|
} else if(file->uri != NULL) {
|
|
#ifdef ENABLE_S3_SDK
|
|
if(file->iss3) {
|
|
NC_s3sdkclose(file->s3client, &file->s3, 0, &file->errmsg);
|
|
NC_s3clear(&file->s3);
|
|
nullfree(file->errmsg);
|
|
} else
|
|
#endif
|
|
{
|
|
status = nc_http_close(file->state);
|
|
nullfree(file->curlurl);
|
|
}
|
|
#endif
|
|
} else {
|
|
#ifdef USE_PARALLEL
|
|
if (file->use_parallel) {
|
|
int retval;
|
|
if(file->fh != MPI_FILE_NULL
|
|
&& (retval = MPI_File_close(&file->fh)) != MPI_SUCCESS)
|
|
{status = NC_EPARINIT; return status;}
|
|
} else
|
|
#endif
|
|
{
|
|
if(file->fp) fclose(file->fp);
|
|
}
|
|
}
|
|
return status;
|
|
}
|
|
|
|
/*!
|
|
Interpret the magic number found in the header of a netCDF file.
|
|
This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags.
|
|
|
|
@param[in] magic Pointer to a character array with the magic number block.
|
|
@param[out] model Pointer to an integer to hold the corresponding netCDF type.
|
|
@param[out] version Pointer to an integer to hold the corresponding netCDF version.
|
|
@returns NC_NOERR if a legitimate file type found
|
|
@returns NC_ENOTNC otherwise
|
|
|
|
\internal
|
|
\ingroup datasets
|
|
|
|
*/
|
|
static int
|
|
NC_interpret_magic_number(char* magic, NCmodel* model)
|
|
{
|
|
int status = NC_NOERR;
|
|
/* Look at the magic number */
|
|
#ifdef USE_NETCDF4
|
|
if (strlen(UDF0_magic_number) && !strncmp(UDF0_magic_number, magic,
|
|
strlen(UDF0_magic_number)))
|
|
{
|
|
model->impl = NC_FORMATX_UDF0;
|
|
model->format = NC_FORMAT_NETCDF4;
|
|
goto done;
|
|
}
|
|
if (strlen(UDF1_magic_number) && !strncmp(UDF1_magic_number, magic,
|
|
strlen(UDF1_magic_number)))
|
|
{
|
|
model->impl = NC_FORMATX_UDF1;
|
|
model->format = NC_FORMAT_NETCDF4;
|
|
goto done;
|
|
}
|
|
#endif /* USE_NETCDF4 */
|
|
|
|
/* Use the complete magic number string for HDF5 */
|
|
if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) {
|
|
model->impl = NC_FORMATX_NC4;
|
|
model->format = NC_FORMAT_NETCDF4;
|
|
goto done;
|
|
}
|
|
if(magic[0] == '\016' && magic[1] == '\003'
|
|
&& magic[2] == '\023' && magic[3] == '\001') {
|
|
model->impl = NC_FORMATX_NC_HDF4;
|
|
model->format = NC_FORMAT_NETCDF4;
|
|
goto done;
|
|
}
|
|
if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') {
|
|
if(magic[3] == '\001') {
|
|
model->impl = NC_FORMATX_NC3;
|
|
model->format = NC_FORMAT_CLASSIC;
|
|
goto done;
|
|
}
|
|
if(magic[3] == '\002') {
|
|
model->impl = NC_FORMATX_NC3;
|
|
model->format = NC_FORMAT_64BIT_OFFSET;
|
|
goto done;
|
|
}
|
|
if(magic[3] == '\005') {
|
|
model->impl = NC_FORMATX_NC3;
|
|
model->format = NC_FORMAT_64BIT_DATA;
|
|
goto done;
|
|
}
|
|
}
|
|
/* No match */
|
|
status = NC_ENOTNC;
|
|
goto done;
|
|
|
|
done:
|
|
return check(status);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
static void
|
|
printmagic(const char* tag, char* magic, struct MagicFile* f)
|
|
{
|
|
int i;
|
|
fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel);
|
|
for(i=0;i<MAGIC_NUMBER_LEN;i++) {
|
|
unsigned int c = (unsigned int)magic[i];
|
|
c = c & 0x000000FF;
|
|
if(c == '\n')
|
|
fprintf(stderr," 0x%0x/'\\n'",c);
|
|
else if(c == '\r')
|
|
fprintf(stderr," 0x%0x/'\\r'",c);
|
|
else if(c < ' ')
|
|
fprintf(stderr," 0x%0x/'?'",c);
|
|
else
|
|
fprintf(stderr," 0x%0x/'%c'",c,c);
|
|
}
|
|
fprintf(stderr,"\n");
|
|
fflush(stderr);
|
|
}
|
|
|
|
static void
|
|
printlist(NClist* list, const char* tag)
|
|
{
|
|
int i;
|
|
fprintf(stderr,"%s:",tag);
|
|
for(i=0;i<nclistlength(list);i++)
|
|
fprintf(stderr," %s",(char*)nclistget(list,i));
|
|
fprintf(stderr,"\n");
|
|
dbgflush();
|
|
}
|
|
|
|
|
|
#endif
|