2010-06-03 21:24:43 +08:00
|
|
|
/*********************************************************************
|
2018-12-07 05:29:57 +08:00
|
|
|
Copyright 2018, UCAR/Unidata See netcdf/COPYRIGHT file for
|
2010-06-03 21:24:43 +08:00
|
|
|
copying and redistribution conditions.
|
2019-08-09 22:45:39 +08:00
|
|
|
*********************************************************************/
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*
|
|
|
|
* Functions to manage the list of NC structs. There is one NC struct
|
|
|
|
* for each open file.
|
2019-08-09 23:15:59 +08:00
|
|
|
*
|
|
|
|
* @author Dennis Heimbigner
|
2019-08-09 22:48:28 +08:00
|
|
|
*/
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
#include "config.h"
|
2010-06-03 21:24:43 +08:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <assert.h>
|
Provide byte-range reading of remote datasets
re: issue https://github.com/Unidata/netcdf-c/issues/1251
Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.
This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.
Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.
Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.
An additional goal here is to gain some experience with
the Amazon S3 REST protocol.
This architecture and its use documented in
the file docs/byterange.dox.
There are currently two test cases:
1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
datasets.
This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).
1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs
Other changes:
1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
fragment tag with a more general mode= tag.
Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.
2019-01-02 09:27:36 +08:00
|
|
|
#include "ncdispatch.h"
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2019-08-09 23:15:59 +08:00
|
|
|
/** This shift is applied to the ext_ncid in order to get the index in
|
|
|
|
* the array of NC. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define ID_SHIFT (16)
|
2019-08-09 23:15:59 +08:00
|
|
|
|
|
|
|
/** This is the length of the NC list - the number of files that can
|
|
|
|
* be open at one time. We use 2^16 = 65536 entries in the array, but
|
|
|
|
* slot 0 is not used, so only 65535 files may be open at one
|
2019-08-13 19:45:03 +08:00
|
|
|
* time. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define NCFILELISTLENGTH 0x10000
|
2016-05-04 11:17:06 +08:00
|
|
|
|
2019-08-09 23:15:59 +08:00
|
|
|
/** This is the pointer to the array of NC, one for each open file. */
|
2010-06-03 21:24:43 +08:00
|
|
|
static NC** nc_filelist = NULL;
|
|
|
|
|
2019-08-09 23:15:59 +08:00
|
|
|
/** The number of files currently open. */
|
2010-06-03 21:24:43 +08:00
|
|
|
static int numfiles = 0;
|
|
|
|
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
2019-08-09 23:15:59 +08:00
|
|
|
* How many files are currently open?
|
2019-08-09 22:48:28 +08:00
|
|
|
*
|
2019-08-09 23:15:59 +08:00
|
|
|
* @return number of open files.
|
2019-08-09 22:48:28 +08:00
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
2010-06-03 21:24:43 +08:00
|
|
|
int
|
|
|
|
count_NCList(void)
|
|
|
|
{
|
|
|
|
return numfiles;
|
|
|
|
}
|
|
|
|
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
2019-08-14 00:55:44 +08:00
|
|
|
* Free an empty NCList. @note If list is not empty, or has not been
|
|
|
|
* allocated, function will silently exit.
|
2019-08-09 22:48:28 +08:00
|
|
|
*
|
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
2010-06-03 21:24:43 +08:00
|
|
|
void
|
|
|
|
free_NCList(void)
|
|
|
|
{
|
|
|
|
if(numfiles > 0) return; /* not empty */
|
|
|
|
if(nc_filelist != NULL) free(nc_filelist);
|
|
|
|
nc_filelist = NULL;
|
|
|
|
}
|
|
|
|
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
2019-08-09 23:15:59 +08:00
|
|
|
* Add an already-allocated NC to the list. It will be assigned an
|
|
|
|
* ncid in this function.
|
|
|
|
*
|
|
|
|
* If this is the first file to be opened, the nc_filelist will be
|
|
|
|
* allocated and set to all 0.
|
|
|
|
*
|
|
|
|
* The ncid is assigned by finding the first open index in the
|
|
|
|
* nc_filelist array (skipping index 0). The ncid is this index
|
2019-08-14 01:06:06 +08:00
|
|
|
* left-shifted ID_SHIFT bits (16). This puts the file ID in the first
|
|
|
|
* two bytes of the 4-byte integer, and leaves the last two bytes for
|
2019-08-09 23:15:59 +08:00
|
|
|
* group IDs for netCDF-4 files.
|
|
|
|
*
|
|
|
|
* @param ncp Pointer to already-allocated and initialized NC struct.
|
2019-08-09 22:48:28 +08:00
|
|
|
*
|
2019-08-09 23:15:59 +08:00
|
|
|
* @return ::NC_NOERR No error.
|
|
|
|
* @return ::NC_ENOMEM Out of memory.
|
2019-08-09 22:48:28 +08:00
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
2010-06-03 21:24:43 +08:00
|
|
|
int
|
|
|
|
add_to_NCList(NC* ncp)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int new_id;
|
|
|
|
if(nc_filelist == NULL) {
|
2019-08-09 22:45:39 +08:00
|
|
|
if (!(nc_filelist = calloc(1, sizeof(NC*)*NCFILELISTLENGTH)))
|
|
|
|
return NC_ENOMEM;
|
|
|
|
numfiles = 0;
|
2010-06-03 21:24:43 +08:00
|
|
|
}
|
2014-03-08 03:04:38 +08:00
|
|
|
|
2010-06-03 21:24:43 +08:00
|
|
|
new_id = 0; /* id's begin at 1 */
|
2013-06-26 01:04:10 +08:00
|
|
|
for(i=1; i < NCFILELISTLENGTH; i++) {
|
2019-08-09 22:45:39 +08:00
|
|
|
if(nc_filelist[i] == NULL) {new_id = i; break;}
|
2010-06-03 21:24:43 +08:00
|
|
|
}
|
|
|
|
if(new_id == 0) return NC_ENOMEM; /* no more slots */
|
|
|
|
nc_filelist[new_id] = ncp;
|
|
|
|
numfiles++;
|
2019-08-14 01:06:06 +08:00
|
|
|
ncp->ext_ncid = (new_id << ID_SHIFT);
|
2010-06-03 21:24:43 +08:00
|
|
|
return NC_NOERR;
|
|
|
|
}
|
|
|
|
|
2019-09-17 01:28:18 +08:00
|
|
|
/**
|
|
|
|
* Move an NC in the nc_filelist. This is required by PIO.
|
|
|
|
*
|
|
|
|
* @param ncp Pointer to already-allocated and initialized NC struct.
|
|
|
|
* @param new_id New index in the nc_filelist for this file.
|
|
|
|
*
|
|
|
|
* @return ::NC_NOERR No error.
|
|
|
|
* @return ::NC_EINVAL Invalid input.
|
|
|
|
* @author Ed Hartnett
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
move_in_NCList(NC *ncp, int new_id)
|
|
|
|
{
|
|
|
|
/* If no files in list, error. */
|
|
|
|
if (!nc_filelist)
|
|
|
|
return NC_EINVAL;
|
|
|
|
|
|
|
|
/* If new slot is already taken, error. */
|
|
|
|
if (nc_filelist[new_id])
|
|
|
|
return NC_EINVAL;
|
|
|
|
|
|
|
|
/* Move the file. */
|
|
|
|
nc_filelist[ncp->ext_ncid >> ID_SHIFT] = NULL;
|
|
|
|
nc_filelist[new_id] = ncp;
|
|
|
|
ncp->ext_ncid = (new_id << ID_SHIFT);
|
|
|
|
|
|
|
|
return NC_NOERR;
|
|
|
|
}
|
|
|
|
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
2019-08-09 23:15:59 +08:00
|
|
|
* Delete an NC struct from the list. This happens when the file is
|
2019-08-13 19:45:03 +08:00
|
|
|
* closed. Relies on all memory in the NC being deallocated after this
|
|
|
|
* function with freeNC().
|
2019-08-09 23:15:59 +08:00
|
|
|
*
|
|
|
|
* @note If the file list is empty, or this NC can't be found in the
|
|
|
|
* list, this function will silently exit.
|
|
|
|
*
|
|
|
|
* @param ncp Pointer to NC to be removed from list.
|
2019-08-09 22:48:28 +08:00
|
|
|
*
|
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
2010-06-03 21:24:43 +08:00
|
|
|
void
|
|
|
|
del_from_NCList(NC* ncp)
|
|
|
|
{
|
2019-08-09 22:45:39 +08:00
|
|
|
unsigned int ncid = ((unsigned int)ncp->ext_ncid) >> ID_SHIFT;
|
|
|
|
if(numfiles == 0 || ncid == 0 || nc_filelist == NULL) return;
|
|
|
|
if(nc_filelist[ncid] != ncp) return;
|
2014-03-08 03:04:38 +08:00
|
|
|
|
2019-08-09 22:45:39 +08:00
|
|
|
nc_filelist[ncid] = NULL;
|
|
|
|
numfiles--;
|
2010-10-14 06:53:25 +08:00
|
|
|
|
2019-08-09 22:45:39 +08:00
|
|
|
/* If all files have been closed, release the filelist memory. */
|
|
|
|
if (numfiles == 0)
|
|
|
|
free_NCList();
|
2010-06-03 21:24:43 +08:00
|
|
|
}
|
|
|
|
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
2019-08-13 20:31:06 +08:00
|
|
|
* Find an NC in the list, given an ext_ncid. The NC list is indexed
|
|
|
|
* with the first two bytes of ext_ncid. (The last two bytes specify
|
|
|
|
* the group for netCDF4 files, or are zeros for classic files.)
|
2019-08-09 23:15:59 +08:00
|
|
|
*
|
|
|
|
* @param ext_ncid The ncid of the file to find.
|
2019-08-09 22:48:28 +08:00
|
|
|
*
|
2019-08-09 23:15:59 +08:00
|
|
|
* @return pointer to NC or NULL if not found.
|
2019-08-13 20:03:48 +08:00
|
|
|
* @author Dennis Heimbigner, Ed Hartnett
|
2019-08-09 22:48:28 +08:00
|
|
|
*/
|
2010-08-18 23:11:17 +08:00
|
|
|
NC *
|
2010-06-03 21:24:43 +08:00
|
|
|
find_in_NCList(int ext_ncid)
|
|
|
|
{
|
2019-08-09 22:45:39 +08:00
|
|
|
NC* f = NULL;
|
2019-09-30 02:59:28 +08:00
|
|
|
|
2019-08-13 20:31:06 +08:00
|
|
|
/* Discard the first two bytes of ext_ncid to get ncid. */
|
2019-08-09 22:45:39 +08:00
|
|
|
unsigned int ncid = ((unsigned int)ext_ncid) >> ID_SHIFT;
|
2019-08-13 20:31:06 +08:00
|
|
|
|
|
|
|
/* If we have a filelist, there will be an entry, possibly NULL,
|
|
|
|
* for this ncid. */
|
2019-08-13 20:03:48 +08:00
|
|
|
if (nc_filelist)
|
|
|
|
{
|
|
|
|
assert(numfiles);
|
2019-08-09 22:45:39 +08:00
|
|
|
f = nc_filelist[ncid];
|
2019-08-13 20:03:48 +08:00
|
|
|
}
|
2018-01-25 14:03:19 +08:00
|
|
|
|
2019-08-13 20:31:06 +08:00
|
|
|
/* For classic files, ext_ncid must be a multiple of
|
|
|
|
* (1<<ID_SHIFT). That is, the group part of the ext_ncid (the
|
|
|
|
* last two bytes) must be zero. If not, then return NULL, which
|
|
|
|
* will eventually lead to an NC_EBADID error being returned to
|
|
|
|
* user. */
|
2019-09-30 02:59:28 +08:00
|
|
|
if (f != NULL && f->dispatch != NULL
|
|
|
|
&& f->dispatch->model == NC_FORMATX_NC3 && (ext_ncid % (1<<ID_SHIFT)))
|
2019-08-09 22:45:39 +08:00
|
|
|
return NULL;
|
2018-01-25 14:03:19 +08:00
|
|
|
|
2019-08-09 22:45:39 +08:00
|
|
|
return f;
|
2010-06-03 21:24:43 +08:00
|
|
|
}
|
|
|
|
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
2019-08-09 23:15:59 +08:00
|
|
|
* Find an NC in the list using the file name.
|
|
|
|
*
|
|
|
|
* @param path Name of the file.
|
2019-08-09 22:48:28 +08:00
|
|
|
*
|
2019-08-09 23:15:59 +08:00
|
|
|
* @return pointer to NC or NULL if not found.
|
2019-08-09 22:48:28 +08:00
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
2014-03-08 03:04:38 +08:00
|
|
|
NC*
|
|
|
|
find_in_NCList_by_name(const char* path)
|
|
|
|
{
|
2019-08-09 22:45:39 +08:00
|
|
|
int i;
|
|
|
|
NC* f = NULL;
|
|
|
|
if(nc_filelist == NULL)
|
|
|
|
return NULL;
|
|
|
|
for(i=1; i < NCFILELISTLENGTH; i++) {
|
|
|
|
if(nc_filelist[i] != NULL) {
|
|
|
|
if(strcmp(nc_filelist[i]->path,path)==0) {
|
|
|
|
f = nc_filelist[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return f;
|
2014-03-08 03:04:38 +08:00
|
|
|
}
|
|
|
|
|
2019-08-09 22:48:28 +08:00
|
|
|
/**
|
2019-08-09 23:15:59 +08:00
|
|
|
* Find an NC in list based on its index. The index is ((unsigned
|
2019-08-14 04:57:43 +08:00
|
|
|
* int)ext_ncid) >> ID_SHIFT. This is the two high bytes of the
|
|
|
|
* ext_ncid. (The other two bytes are used for the group ID for
|
|
|
|
* netCDF-4 files.)
|
2019-08-09 23:15:59 +08:00
|
|
|
*
|
|
|
|
* @param index The index in the NC list.
|
2019-09-18 10:27:43 +08:00
|
|
|
* @param ncp Pointer that gets pointer to the next NC. Ignored if
|
2019-08-09 23:15:59 +08:00
|
|
|
* NULL.
|
2019-08-09 22:48:28 +08:00
|
|
|
*
|
2019-08-09 23:15:59 +08:00
|
|
|
* @return ::NC_NOERR No error.
|
|
|
|
* @return ::NC_ERANGE Index out of range.
|
2019-08-09 22:48:28 +08:00
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
2016-05-04 11:17:06 +08:00
|
|
|
int
|
|
|
|
iterate_NCList(int index, NC** ncp)
|
|
|
|
{
|
|
|
|
/* Walk from 0 ...; 0 return => stop */
|
|
|
|
if(index < 0 || index >= NCFILELISTLENGTH)
|
2019-08-09 22:45:39 +08:00
|
|
|
return NC_ERANGE;
|
2016-05-04 11:17:06 +08:00
|
|
|
if(ncp) *ncp = nc_filelist[index];
|
|
|
|
return NC_NOERR;
|
|
|
|
}
|