netcdf-c/libhdf5/H5FDhttp.c

/*********************************************************************
*    Copyright 2018, UCAR/Unidata
*    See netcdf/COPYRIGHT file for copying and redistribution conditions.
* ********************************************************************/

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the COPYING file, which can be found at the root of the source code       *
 * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
 * If you do not have access to either file, you may request a copy from     *
 * help@hdfgroup.org.                                                        *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/* Programmer:  Dennis Heimbigner dmh@ucar.edu
 *
 * Purpose:  Access remote datasets using byte range requests.
 * Derived from the HDF5 H5FDstdio.c file.
 *
 * NOTE:    This driver is not as well tested as the standard SEC2 driver
 *          and is not intended for production use!
 */

#include "config.h"

#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>

#include <hdf5.h>
#include <curl/curl.h>

#ifdef H5_HAVE_FLOCK
/* Needed for lock type definitions (e.g., LOCK_EX) */
#include <sys/file.h>
#endif /* H5_HAVE_FLOCK */

#ifdef H5_HAVE_UNISTD_H
#include <unistd.h>
#endif

/*
Define a simple #ifdef test for the version of H5FD_class_t we are using 
*/

#if H5_VERS_MAJOR == 1
#if H5_VERS_MINOR < 10
#define H5FDCLASS1 1
#endif
#else
#error "Cannot determine version of H5FD_class_t"
#endif

#ifdef H5_HAVE_WIN32_API
/* The following two defines must be before any windows headers are included */
#define WIN32_LEAN_AND_MEAN    /* Exclude rarely-used stuff from Windows headers */
#define NOGDI                  /* Exclude Graphic Display Interface macros */

#include <windows.h>
#include <io.h>

#endif /* H5_HAVE_WIN32_API */

#include "netcdf.h"
#include "ncbytes.h"
#include "nclist.h"
#include "nchttp.h"

#include "H5FDhttp.h"

typedef off_t file_offset_t;

/* The driver identification number, initialized at runtime */
static hid_t H5FD_HTTP_g = 0;

/* File operations */
typedef enum {
    H5FD_HTTP_OP_UNKNOWN=0,
    H5FD_HTTP_OP_READ=1,
    H5FD_HTTP_OP_WRITE=2,
    H5FD_HTTP_OP_SEEK=3
} H5FD_http_file_op;

/* The description of a file belonging to this driver. The 'eoa' and 'eof'
 * determine the amount of hdf5 address space in use and the high-water mark
 * of the file (the current size of the underlying Unix file). The 'pos'
 * value is used to eliminate file position updates when they would be a
 * no-op. Unfortunately we've found systems that use separate file position
 * indicators for reading and writing so the lseek can only be eliminated if
 * the current operation is the same as the previous operation.  When opening
 * a file the 'eof' will be set to the current file size, 'eoa' will be set
 * to zero, 'pos' will be set to H5F_ADDR_UNDEF (as it is when an error
 * occurs), and 'op' will be set to H5F_OP_UNKNOWN.
 */
typedef struct H5FD_http_t {
    H5FD_t      pub;            /* public stuff, must be first      */
    haddr_t     eoa;            /* end of allocated region          */
    haddr_t     eof;            /* end of file; current file size   */
    haddr_t     pos;            /* current file I/O position        */
    unsigned    write_access;   /* Flag to indicate the file was opened with write access */
    H5FD_http_file_op op;	/* last operation */
    NC_HTTP_STATE*  state;       /* Curl handle + extra */
    char*           url;        /* The URL (minus any fragment) for the dataset */ 
} H5FD_http_t;


/* These macros check for overflow of various quantities.  These macros
 * assume that file_offset_t is signed and haddr_t and size_t are unsigned.
 *
 * ADDR_OVERFLOW:  Checks whether a file address of type `haddr_t'
 *      is too large to be represented by the second argument
 *      of the file seek function.
 *
 * SIZE_OVERFLOW:  Checks whether a buffer size of type `hsize_t' is too
 *      large to be represented by the `size_t' type.
 *
 * REGION_OVERFLOW:  Checks whether an address and size pair describe data
 *      which can be addressed entirely by the second
 *      argument of the file seek function.
 */
/* adding for windows NT filesystem support. */
#define MAXADDR (((haddr_t)1<<(8*sizeof(file_offset_t)-1))-1)
#define ADDR_OVERFLOW(A)  (HADDR_UNDEF==(A) || ((A) & ~(haddr_t)MAXADDR))
#define SIZE_OVERFLOW(Z)  ((Z) & ~(hsize_t)MAXADDR)
#define REGION_OVERFLOW(A,Z)  (ADDR_OVERFLOW(A) || SIZE_OVERFLOW(Z) || \
    HADDR_UNDEF==(A)+(Z) || (file_offset_t)((A)+(Z))<(file_offset_t)(A))

/* Prototypes */
static H5FD_t *H5FD_http_open(const char *name, unsigned flags,
                 hid_t fapl_id, haddr_t maxaddr);
static herr_t H5FD_http_close(H5FD_t *lf);
static int H5FD_http_cmp(const H5FD_t *_f1, const H5FD_t *_f2);
static herr_t H5FD_http_query(const H5FD_t *_f1, unsigned long *flags);
static haddr_t H5FD_http_alloc(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size);
static haddr_t H5FD_http_get_eoa(const H5FD_t *_file, H5FD_mem_t type);
static herr_t H5FD_http_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr);
static herr_t  H5FD_http_get_handle(H5FD_t *_file, hid_t fapl, void** file_handle);
static herr_t H5FD_http_read(H5FD_t *lf, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
                size_t size, void *buf);
static herr_t H5FD_http_write(H5FD_t *lf, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
                size_t size, const void *buf);

/* The H5FD_class_t structure has different versions */
#ifdef H5FDCLASS1
static haddr_t H5FD_http_get_eof(const H5FD_t *_file);
static herr_t H5FD_http_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing);
static herr_t H5FD_http_lock(H5FD_t *_file, unsigned char* old, unsigned lock_type, hbool_t last);
static herr_t H5FD_http_unlock(H5FD_t *file, unsigned char *oid, hbool_t last);
#else
static herr_t H5FD_http_term(void);
static haddr_t H5FD_http_get_eof(const H5FD_t *_file, H5FD_mem_t type);
static herr_t H5FD_http_flush(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
static herr_t H5FD_http_lock(H5FD_t *_file, hbool_t rw);
static herr_t H5FD_http_unlock(H5FD_t *_file);
#endif

/* Beware, not same as H5FD_HTTP_g */
static const H5FD_class_t H5FD_http_g = {
#if H5_VERSION_GE(1,13,2)
    H5FD_CLASS_VERSION,		/* struct version  */
    H5_VFD_HTTP,		/* value           */
#endif
    "http",			/* name         */
    MAXADDR,			/* maxaddr      */
    H5F_CLOSE_WEAK,		/* fc_degree    */
#ifndef H5FDCLASS1
    H5FD_http_term,		/* terminate    */
#endif
    NULL,			/* sb_size      */
    NULL,			/* sb_encode    */
    NULL,			/* sb_decode    */
    0,				/* fapl_size    */
    NULL,			/* fapl_get     */
    NULL,			/* fapl_copy    */
    NULL,			/* fapl_free    */
    0,				/* dxpl_size    */
    NULL,			/* dxpl_copy    */
    NULL,			/* dxpl_free    */
    H5FD_http_open,		/* open         */
    H5FD_http_close,		/* close        */
    H5FD_http_cmp,		/* cmp          */
    H5FD_http_query,		/* query        */
    NULL,			/* get_type_map */
    H5FD_http_alloc,		/* alloc        */
    NULL,			/* free         */
    H5FD_http_get_eoa,		/* get_eoa      */
    H5FD_http_set_eoa,		/* set_eoa      */
    H5FD_http_get_eof,		/* get_eof      */
    H5FD_http_get_handle,	/* get_handle   */
    H5FD_http_read,		/* read         */
    H5FD_http_write,		/* write        */
#if H5_VERSION_GE(1,13,2)
    NULL,			/* read_vector     */
    NULL,			/* write_vector    */
    NULL,			/* read_selection  */
    NULL,			/* write_selection */
#endif
    H5FD_http_flush,		/* flush        */
    NULL,			/* truncate     */
    H5FD_http_lock,		/* lock         */
    H5FD_http_unlock,		/* unlock       */
#if H5_VERSION_GE(1,13,2)
    NULL,			/* del          */
    NULL,			/* ctl	        */
#endif
    H5FD_FLMAP_DICHOTOMY	/* fl_map       */
};


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_init
 *
 * Purpose:  Initialize this driver by registering the driver with the
 *    library.
 *
 * Return:  Success:  The driver ID for the driver.
 *
 *    Failure:  Negative.
 *
 * Programmer:  Robb Matzke
 *              Thursday, July 29, 1999
 *
 *-------------------------------------------------------------------------
 */
EXTERNL hid_t
H5FD_http_init(void)
{
    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    if (H5I_VFL!=H5Iget_type(H5FD_HTTP_g))
        H5FD_HTTP_g = H5FDregister(&H5FD_http_g);
    return H5FD_HTTP_g;
} /* end H5FD_http_init() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_finalize
 *
 * Purpose:  Free this driver by unregistering the driver with the
 *    library.
 *
 * Returns:     Non-negative on success or negative on failure
 *
 * Programmer:  John Donoghue
 *              Tuesday, December 12, 2023
 *
 *-------------------------------------------------------------------------
 */
EXTERNL hid_t
H5FD_http_finalize(void)
{
    /* Reset VFL ID */
    if (H5FD_HTTP_g)
         H5FDunregister(H5FD_HTTP_g);
    H5FD_HTTP_g = 0;

    return H5FD_HTTP_g;
} /* end H5FD_http_finalize() */


/*---------------------------------------------------------------------------
 * Function:  H5FD_http_term
 *
 * Purpose:  Shut down the VFD
 *
 * Returns:     Non-negative on success or negative on failure
 *
 * Programmer:  Quincey Koziol
 *              Friday, Jan 30, 2004
 *
 *---------------------------------------------------------------------------
 */
#ifndef H5FDCLASS1
static herr_t
H5FD_http_term(void)
{
    return 0;
} /* end H5FD_http_term() */
#endif


/*-------------------------------------------------------------------------
 * Function:  H5Pset_fapl_http
 *
 * Purpose:  Modify the file access property list to use the H5FD_HTTP
 *    driver defined in this source file.  There are no driver
 *    specific properties.
 *
 * Return:  Non-negative on success/Negative on failure
 *
 * Programmer:  Robb Matzke
 *    Thursday, February 19, 1998
 *
 *-------------------------------------------------------------------------
 */
EXTERNL herr_t
H5Pset_fapl_http(hid_t fapl_id)
{
    static const char *func = "H5FDset_fapl_http";  /*for error reporting*/

    /*NO TRACE*/

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    if(0 == H5Pisa_class(fapl_id, H5P_FILE_ACCESS))
        H5Epush_ret(func, H5E_ERR_CLS, H5E_PLIST, H5E_BADTYPE, "not a file access property list", -1);

    return H5Pset_driver(fapl_id, H5FD_HTTP, NULL);
} /* end H5Pset_fapl_http() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_open
 *
 * Purpose:  Opens a remote Object as an HDF5 file.
 *
 * Errors:
 *  IO  CANTOPENFILE    File doesn't exist and CREAT wasn't
 *                      specified.
 *
 * Return:
 *      Success:    A pointer to a new file data structure. The
 *                  public fields will be initialized by the
 *                  caller, which is always H5FD_open().
 *
 *      Failure:    NULL
 *
 * Programmer:  Dennis Heimbigner
 *
 *-------------------------------------------------------------------------
 */
static H5FD_t *
H5FD_http_open( const char *name, unsigned flags, hid_t /*UNUSED*/ fapl_id,
    haddr_t maxaddr)
{
    unsigned            write_access = 0;           /* File opened with write access? */
    H5FD_http_t *file = NULL;
    static const char   *func = "H5FD_http_open";  /* Function Name for error reporting */
    long long len = -1;
    int ncstat = NC_NOERR;
    NC_HTTP_STATE* state = NULL;

    /* Sanity check on file offsets */
    assert(sizeof(file_offset_t) >= sizeof(size_t));

    /* Quiet compiler */
    fapl_id = fapl_id;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Check arguments */
    if (!name || !*name)
        H5Epush_ret(func, H5E_ERR_CLS, H5E_ARGS, H5E_BADVALUE, "invalid URL", NULL);
    if (0 == maxaddr || HADDR_UNDEF == maxaddr)
        H5Epush_ret(func, H5E_ERR_CLS, H5E_ARGS, H5E_BADRANGE, "bogus maxaddr", NULL);
    if (ADDR_OVERFLOW(maxaddr))
        H5Epush_ret(func, H5E_ERR_CLS, H5E_ARGS, H5E_OVERFLOW, "maxaddr too large", NULL);

    /* Always read-only */
    write_access = 0;

   /* Open file in read-only mode, to check for existence  and get length */
    if((ncstat = nc_http_open(name,&state))) {
        H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_CANTOPENFILE, "cannot access object", NULL);
    }
    if((ncstat = nc_http_size(state,&len))) {
        H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_CANTOPENFILE, "cannot access object", NULL);
    }

    /* Build the return value */
    if(NULL == (file = (H5FD_http_t *)H5allocate_memory(sizeof(H5FD_http_t),0))) {
	nc_http_close(state);
        H5Epush_ret(func, H5E_ERR_CLS, H5E_RESOURCE, H5E_NOSPACE, "memory allocation failed", NULL);
    } /* end if */
    memset(file,0,sizeof(H5FD_http_t));

    file->op = H5FD_HTTP_OP_SEEK;
    file->pos = HADDR_UNDEF;
    file->write_access = write_access;    /* Note the write_access for later */
    file->eof = (haddr_t)len;
    file->state = state; state = NULL;
    file->url = H5allocate_memory(strlen(name)+1,0);
    if(file->url == NULL) {
	nc_http_close(state);
        H5Epush_ret(func, H5E_ERR_CLS, H5E_RESOURCE, H5E_NOSPACE, "memory allocation failed", NULL);
    }
    memcpy(file->url,name,strlen(name)+1);

    return((H5FD_t*)file);
} /* end H5FD_HTTP_OPen() */


/*-------------------------------------------------------------------------
 * Function:  H5F_http_close
 *
 * Purpose:  Closes a file.
 *
 * Errors:
 *    IO    CLOSEERROR  Fclose failed.
 *
 * Return:  Non-negative on success/Negative on failure
 *
 * Programmer:  Dennis Heimbigner
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5FD_http_close(H5FD_t *_file)
{
    H5FD_http_t  *file = (H5FD_http_t*)_file;
#if 0
    static const char *func = "H5FD_http_close";  /* Function Name for error reporting */
#endif

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Close the underlying curl handle*/
    if(file->state) nc_http_close(file->state);
    if(file->url) H5free_memory(file->url);

    H5free_memory(file);

    return 0;
} /* end H5FD_http_close() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_cmp
 *
 * Purpose:  Compares two files belonging to this driver using an
 *    arbitrary (but consistent) ordering.
 *
 * Return:
 *      Success:    A value like strcmp()
 *
 *      Failure:    never fails (arguments were checked by the caller).
 *
 * Programmer:  Robb Matzke
 *              Thursday, July 29, 1999
 *
 *-------------------------------------------------------------------------
 */
static int
H5FD_http_cmp(const H5FD_t *_f1, const H5FD_t *_f2)
{
    const H5FD_http_t  *f1 = (const H5FD_http_t*)_f1;
    const H5FD_http_t  *f2 = (const H5FD_http_t*)_f2;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    if(strcmp(f1->url,f2->url) < 0) return -1;
    if(strcmp(f1->url,f2->url) > 0) return 1;
    return 0;
} /* H5FD_http_cmp() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_query
 *
 * Purpose:  Set the flags that this VFL driver is capable of supporting.
 *              (listed in H5FDpublic.h)
 *
 * Return:  Success:  non-negative
 *
 *    Failure:  negative
 *
 * Programmer:  Quincey Koziol
 *              Friday, August 25, 2000
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5FD_http_query(const H5FD_t *_f, unsigned long /*OUT*/ *flags)
{
    /* Quiet the compiler */
    _f=_f;

    /* Set the VFL feature flags that this driver supports.
     *
     * Note that this VFD does not support SWMR due to the unpredictable
     * nature of the buffering layer.
     */
    if(flags) {
        *flags = 0;
        *flags |= H5FD_FEAT_AGGREGATE_METADATA;     /* OK to aggregate metadata allocations                             */
        *flags |= H5FD_FEAT_ACCUMULATE_METADATA;    /* OK to accumulate metadata for faster writes                      */
        *flags |= H5FD_FEAT_DATA_SIEVE;             /* OK to perform data sieving for faster raw data reads & writes    */
        *flags |= H5FD_FEAT_AGGREGATE_SMALLDATA;    /* OK to aggregate "small" raw data allocations                     */
#ifndef H5FDCLASS1
        *flags |= H5FD_FEAT_DEFAULT_VFD_COMPATIBLE; /* VFD creates a file which can be opened with the default VFD      */
#endif
    }

    return 0;
} /* end H5FD_http_query() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_alloc
 *
 * Purpose:     Allocates file memory. If fseeko isn't available, makes
 *              sure the file size isn't bigger than 2GB because the
 *              parameter OFFSET of fseek is of the type LONG INT, limiting
 *              the file size to 2GB.
 *
 * Return:
 *      Success:    Address of new memory
 *
 *      Failure:    HADDR_UNDEF
 *
 * Programmer:  Raymond Lu
 *              30 March 2007
 *
 *-------------------------------------------------------------------------
 */
static haddr_t
H5FD_http_alloc(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, hid_t /*UNUSED*/ dxpl_id, hsize_t size)
{
    H5FD_http_t    *file = (H5FD_http_t*)_file;
    haddr_t         addr;

    /* Quiet compiler */
    type = type;
    dxpl_id = dxpl_id;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Compute the address for the block to allocate */
    addr = file->eoa;

    file->eoa = addr + size;

    return addr;
} /* end H5FD_http_alloc() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_get_eoa
 *
 * Purpose:  Gets the end-of-address marker for the file. The EOA marker
 *           is the first address past the last byte allocated in the
 *           format address space.
 *
 * Return:  Success:  The end-of-address marker.
 *
 *    Failure:  HADDR_UNDEF
 *
 * Programmer:  Robb Matzke
 *              Monday, August  2, 1999
 *
 *-------------------------------------------------------------------------
 */
static haddr_t
H5FD_http_get_eoa(const H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type)
{
    const H5FD_http_t *file = (const H5FD_http_t *)_file;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Quiet compiler */
    type = type;

    return file->eoa;
} /* end H5FD_http_get_eoa() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_set_eoa
 *
 * Purpose:  Set the end-of-address marker for the file. This function is
 *    called shortly after an existing HDF5 file is opened in order
 *    to tell the driver where the end of the HDF5 data is located.
 *
 * Return:  Success:  0
 *
 *    Failure:  Does not fail
 *
 * Programmer:  Robb Matzke
 *              Thursday, July 29, 1999
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5FD_http_set_eoa(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, haddr_t addr)
{
    H5FD_http_t  *file = (H5FD_http_t*)_file;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Quiet the compiler */
    type = type;

    file->eoa = addr;

    return 0;
}


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_get_eof
 *
 * Purpose:  Returns the end-of-file marker, which is the greater of
 *    either the Unix end-of-file or the HDF5 end-of-address
 *    markers.
 *
 * Return:  Success:  End of file address, the first address past
 *        the end of the "file", either the Unix file
 *        or the HDF5 file.
 *
 *    Failure:  HADDR_UNDEF
 *
 * Programmer:  Robb Matzke
 *              Thursday, July 29, 1999
 *
 *-------------------------------------------------------------------------
 */

static haddr_t
#ifdef H5FDCLASS1
H5FD_http_get_eof(const H5FD_t *_file)
#else
H5FD_http_get_eof(const H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type)
#endif
{
    const H5FD_http_t  *file = (const H5FD_http_t *)_file;

#ifndef H5FDCLASS1
    /* Quiet the compiler */
    type = type;
#endif

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    return(file->eof);
} /* end H5FD_http_get_eof() */


/*-------------------------------------------------------------------------
 * Function:       H5FD_http_get_handle
 *
 * Purpose:        Returns the file handle of file driver.
 *
 * Returns:        Non-negative if succeed or negative if fails.
 *
 * Programmer:     Raymond Lu
 *                 Sept. 16, 2002
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5FD_http_get_handle(H5FD_t *_file, hid_t /*UNUSED*/ fapl, void **file_handle)
{
    H5FD_http_t       *file = (H5FD_http_t *)_file;
    static const char  *func = "H5FD_http_get_handle";  /* Function Name for error reporting */

    /* Quiet the compiler */
    fapl = fapl;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    *file_handle = file->state;
    if(*file_handle == NULL)
        H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_WRITEERROR, "get handle failed", -1);

    return 0;
} /* end H5FD_http_get_handle() */


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_read
 *
 * Purpose:  Reads SIZE bytes beginning at address ADDR in file LF and
 *    places them in buffer BUF.  Reading past the logical or
 *    physical end of file returns zeros instead of failing.
 *
 * Errors:
 *    IO    READERROR  fread failed.
 *    IO    SEEKERROR  fseek failed.
 *
 * Return:  Non-negative on success/Negative on failure
 *
 * Programmer:  Robb Matzke
 *    Wednesday, October 22, 1997
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5FD_http_read(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, hid_t /*UNUSED*/ dxpl_id,
    haddr_t addr, size_t size, void /*OUT*/ *buf)
{
    H5FD_http_t    *file = (H5FD_http_t*)_file;
    static const char *func = "H5FD_http_read";  /* Function Name for error reporting */
    int ncstat = NC_NOERR;

    /* Quiet the compiler */
    type = type;
    dxpl_id = dxpl_id;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Check for overflow */
    if (HADDR_UNDEF==addr)
        H5Epush_ret (func, H5E_ERR_CLS, H5E_IO, H5E_OVERFLOW, "file address overflowed", -1);
    if (REGION_OVERFLOW(addr, size))
        H5Epush_ret (func, H5E_ERR_CLS, H5E_IO, H5E_OVERFLOW, "file address overflowed", -1);

    /* Check easy cases */
    if (0 == size)
        return 0;
    if ((haddr_t)addr >= file->eof) {
        memset(buf, 0, size);
        return 0;
    }

    /* Seek to the correct file position. */
    if (!(file->op == H5FD_HTTP_OP_READ || file->op == H5FD_HTTP_OP_SEEK) ||
            file->pos != addr) {
#if 0
        if (file_fseek(file->fp, (file_offset_t)addr, SEEK_SET) < 0) {
            file->op = H5FD_HTTP_OP_UNKNOWN;
            file->pos = HADDR_UNDEF;
            H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_SEEKERROR, "fseek failed", -1);
        }
#endif
        file->pos = addr;
    }

    /* Read zeros past the logical end of file (physical is handled below) */
    if (addr + size > file->eof) {
        size_t nbytes = (size_t) (addr + size - file->eof);
        memset((unsigned char *)buf + size - nbytes, 0, nbytes);
        size -= nbytes;
    }

    {
	NCbytes* bbuf = ncbytesnew();
        if((ncstat = nc_http_read(file->state,addr,size,bbuf))) {
            file->op = H5FD_HTTP_OP_UNKNOWN;
            file->pos = HADDR_UNDEF;
	    ncbytesfree(bbuf); bbuf = NULL;
            H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_READERROR, "HTTP byte-range read failed", -1);
        } /* end if */

	/* Check that proper number of bytes was read */
	if(ncbyteslength(bbuf) != size) {
	    ncbytesfree(bbuf); bbuf = NULL;
            H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_READERROR, "HTTP byte-range read mismatch ", -1);
	}	

	/* Extract the data from buf */
	memcpy(buf,ncbytescontents(bbuf),size);	        
	ncbytesfree(bbuf);
    }

    /* Update the file position data. */
    file->op = H5FD_HTTP_OP_READ;
    file->pos = addr;

    return 0;
}


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_write
 *
 * Purpose:  Writes SIZE bytes from the beginning of BUF into file LF at
 *    file address ADDR.
 *
 * Errors:
 *    IO    SEEKERROR   fseek failed.
 *    IO    WRITEERROR  fwrite failed.
 *
 * Return:  Non-negative on success/Negative on failure
 *
 * Programmer:  Dennis Heimbigner
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5FD_http_write(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, hid_t /*UNUSED*/ dxpl_id,
    haddr_t addr, size_t size, const void *buf)
{
    static const char *func = "H5FD_http_write";  /* Function Name for error reporting */

    /* Quiet the compiler */
    dxpl_id = dxpl_id;
    type = type;

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Always Fails */
    H5Epush_ret (func, H5E_ERR_CLS, H5E_IO, H5E_WRITEERROR, "file is read-only", -1);

    return 0;
}


/*-------------------------------------------------------------------------
 * Function:  H5FD_http_flush
 *
 * Purpose:  Makes sure that all data is on disk.
 *
 * Errors:
 *    IO    SEEKERROR     fseek failed.
 *    IO    WRITEERROR    fflush or fwrite failed.
 *
 * Return:  Non-negative on success/Negative on failure
 *
 * Programmer:  Robb Matzke
 *    Wednesday, October 22, 1997
 *
 *-------------------------------------------------------------------------
 */
static herr_t
#ifdef H5FDCLASS1
H5FD_http_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing)
#else
H5FD_http_flush(H5FD_t *_file, hid_t /*UNUSED*/ dxpl_id, hbool_t closing)
#endif
{

#ifndef H5FDCLASS1
    /* Quiet the compiler */
    dxpl_id = dxpl_id;
#endif

    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    return 0;
} /* end H5FD_http_flush() */


/*-------------------------------------------------------------------------
 * Function:    H5FD_http_lock
 *
 * Purpose:     Lock a file via flock
 *              NOTE: This function is a no-op if flock() is not present.
 *
 * Errors:
 *    IO    FCNTL    flock failed.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  Vailin Choi; March 2015
 *
 *-------------------------------------------------------------------------
 */
static herr_t
#ifdef H5FDCLASS1
H5FD_http_lock(H5FD_t *_file, unsigned char* old, unsigned lock_type, hbool_t last)
#else
H5FD_http_lock(H5FD_t *_file, hbool_t rw)
#endif
{
    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

#ifdef H5FDCLASS1
    /* Quiet the compiler */
    lock_type = lock_type;
    last = last;
#else
    rw = rw;
#endif

    return 0;
} /* end H5FD_http_lock() */

/*-------------------------------------------------------------------------
 * Function:    H5F_http_unlock
 *
 * Purpose:     Unlock a file via flock
 *              NOTE: This function is a no-op if flock() is not present.
 *
 * Errors:
 *    IO    FCNTL    flock failed.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  Vailin Choi; March 2015
 *
 *-------------------------------------------------------------------------
 */
static herr_t
#ifdef H5FDCLASS1
H5FD_http_unlock(H5FD_t *file, /*UNUSED*/unsigned char *oid, /*UNUSED*/ hbool_t last)
#else
H5FD_http_unlock(H5FD_t *_file)
#endif
{
    /* Clear the error stack */
    H5Eclear2(H5E_DEFAULT);

    /* Quiet the compiler */
#ifdef H5FDCLASS1
    oid = oid;
    last = last;
#endif

    return 0;
} /* end H5FD_http_unlock() */


#ifdef _H5private_H
/*
 * This is not related to the functionality of the driver code.
 * It is added here to trigger warning if HDF5 private definitions are included
 * by mistake.  The code should use only HDF5 public API and definitions.
 */
#error "Do not use HDF5 private definitions"
#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								/*********************************************************************
 								*    Copyright 2018, UCAR/Unidata
 								*    See netcdf/COPYRIGHT file for copying and redistribution conditions.
 								* ********************************************************************/
 								/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 								 * Copyright by The HDF Group.                                               *
 								 * All rights reserved.                                                      *
 								 *                                                                           *
 								 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 								 * terms governing use, modification, and redistribution, is contained in    *
 								 * the COPYING file, which can be found at the root of the source code       *
 								 * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
 								 * If you do not have access to either file, you may request a copy from     *
 								 * help@hdfgroup.org.                                                        *
 								 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 								/* Programmer:  Dennis Heimbigner dmh@ucar.edu
 								 *
 								 * Purpose:  Access remote datasets using byte range requests.
 								 * Derived from the HDF5 H5FDstdio.c file.
 								 *
 								 * NOTE:    This driver is not as well tested as the standard SEC2 driver
 								 *          and is not intended for production use!
 								 */
 								#include "config.h"
 								#include <assert.h>
 								#include <errno.h>
 								#include <stdio.h>
 								#include <stdlib.h>
 								#include <string.h>
 								#include <sys/stat.h>
 								#include <hdf5.h>
 								#include <curl/curl.h>
 								#ifdef H5_HAVE_FLOCK
 								/* Needed for lock type definitions (e.g., LOCK_EX) */
 								#include <sys/file.h>
 								#endif /* H5_HAVE_FLOCK */
 								#ifdef H5_HAVE_UNISTD_H
 								#include <unistd.h>
 								#endif
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								/*
 								Define a simple #ifdef test for the version of H5FD_class_t we are using
 								*/
-												Enhance/Fix filter support

re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214

The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.

In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.

So this PR implements the dispatch function and implements
the following standard functions:
    + bzip2
    + zstandard
    + blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.

## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
   dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
  to shell scripts.
* Cleanup szip support to properly support szip
  when HDF5 is disabled. This involves detecting
  libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
  filter API. This means that, for example, nc_def_var_fletcher32
  is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.

## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
  libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
  (including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
  or _Shuffle or _Fletcher32. Used for testing.

											
										
										
											2022-03-15 02:39:37 +08:00
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#if H5_VERS_MAJOR == 1
 								#if H5_VERS_MINOR < 10
 								#define H5FDCLASS1 1
 								#endif
 								#else
 								#error "Cannot determine version of H5FD_class_t"
 								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								#ifdef H5_HAVE_WIN32_API
 								/* The following two defines must be before any windows headers are included */
 								#define WIN32_LEAN_AND_MEAN    /* Exclude rarely-used stuff from Windows headers */
 								#define NOGDI                  /* Exclude Graphic Display Interface macros */
 								#include <windows.h>
 								#include <io.h>
 								#endif /* H5_HAVE_WIN32_API */
 								#include "netcdf.h"
 								#include "ncbytes.h"
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+								#include "nclist.h"
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								#include "nchttp.h"
 								#include "H5FDhttp.h"
 								typedef off_t file_offset_t;
 								/* The driver identification number, initialized at runtime */
 								static hid_t H5FD_HTTP_g = 0;
 								/* File operations */
 								typedef enum {
 								    H5FD_HTTP_OP_UNKNOWN=0,
 								    H5FD_HTTP_OP_READ=1,
 								    H5FD_HTTP_OP_WRITE=2,
 								    H5FD_HTTP_OP_SEEK=3
 								} H5FD_http_file_op;
 								/* The description of a file belonging to this driver. The 'eoa' and 'eof'
 								 * determine the amount of hdf5 address space in use and the high-water mark
 								 * of the file (the current size of the underlying Unix file). The 'pos'
 								 * value is used to eliminate file position updates when they would be a
 								 * no-op. Unfortunately we've found systems that use separate file position
 								 * indicators for reading and writing so the lseek can only be eliminated if
 								 * the current operation is the same as the previous operation.  When opening
 								 * a file the 'eof' will be set to the current file size, 'eoa' will be set
 								 * to zero, 'pos' will be set to H5F_ADDR_UNDEF (as it is when an error
 								 * occurs), and 'op' will be set to H5F_OP_UNKNOWN.
 								 */
 								typedef struct H5FD_http_t {
 								    H5FD_t      pub;            /* public stuff, must be first      */
 								    haddr_t     eoa;            /* end of allocated region          */
 								    haddr_t     eof;            /* end of file; current file size   */
 								    haddr_t     pos;            /* current file I/O position        */
 								    unsigned    write_access;   /* Flag to indicate the file was opened with write access */
-												Modify H5FDhttp.c to work with HDF5 1.14.0

re: https://github.com/Unidata/netcdf-c/issues/2614

Most of the changes are minor comment changes.
But the dispatch table for H5FD has changed, requiring changes
to H5FDhttp.c, which is derived from the HDF5 source file H5FDstdio.c.
The patch is to conditionally modify the dispatch table
to conform to the HDF5-1.14.0 version.
I was able to build and successfully test 1.14 for a reasonable
set of (non-parallel) ./configure options.

											
										
										
											2023-02-11 06:10:43 +08:00
+								    H5FD_http_file_op op;	/* last operation */
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+								    NC_HTTP_STATE*  state;       /* Curl handle + extra */
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    char*           url;        /* The URL (minus any fragment) for the dataset */
 								} H5FD_http_t;
 								/* These macros check for overflow of various quantities.  These macros
 								 * assume that file_offset_t is signed and haddr_t and size_t are unsigned.
 								 *
 								 * ADDR_OVERFLOW:  Checks whether a file address of type `haddr_t'
 								 *      is too large to be represented by the second argument
 								 *      of the file seek function.
 								 *
 								 * SIZE_OVERFLOW:  Checks whether a buffer size of type `hsize_t' is too
 								 *      large to be represented by the `size_t' type.
 								 *
 								 * REGION_OVERFLOW:  Checks whether an address and size pair describe data
 								 *      which can be addressed entirely by the second
 								 *      argument of the file seek function.
 								 */
 								/* adding for windows NT filesystem support. */
 								#define MAXADDR (((haddr_t)1<<(8*sizeof(file_offset_t)-1))-1)
 								#define ADDR_OVERFLOW(A)  (HADDR_UNDEF==(A) || ((A) & ~(haddr_t)MAXADDR))
 								#define SIZE_OVERFLOW(Z)  ((Z) & ~(hsize_t)MAXADDR)
 								#define REGION_OVERFLOW(A,Z)  (ADDR_OVERFLOW(A) || SIZE_OVERFLOW(Z) || \
 								    HADDR_UNDEF==(A)+(Z) || (file_offset_t)((A)+(Z))<(file_offset_t)(A))
 								/* Prototypes */
 								static H5FD_t *H5FD_http_open(const char *name, unsigned flags,
 								                 hid_t fapl_id, haddr_t maxaddr);
 								static herr_t H5FD_http_close(H5FD_t *lf);
 								static int H5FD_http_cmp(const H5FD_t *_f1, const H5FD_t *_f2);
 								static herr_t H5FD_http_query(const H5FD_t *_f1, unsigned long *flags);
 								static haddr_t H5FD_http_alloc(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size);
 								static haddr_t H5FD_http_get_eoa(const H5FD_t *_file, H5FD_mem_t type);
 								static herr_t H5FD_http_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr);
 								static herr_t  H5FD_http_get_handle(H5FD_t *_file, hid_t fapl, void** file_handle);
 								static herr_t H5FD_http_read(H5FD_t *lf, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
 								                size_t size, void *buf);
 								static herr_t H5FD_http_write(H5FD_t *lf, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
 								                size_t size, const void *buf);
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
 								/* The H5FD_class_t structure has different versions */
 								#ifdef H5FDCLASS1
 								static haddr_t H5FD_http_get_eof(const H5FD_t *_file);
 								static herr_t H5FD_http_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing);
 								static herr_t H5FD_http_lock(H5FD_t *_file, unsigned char* old, unsigned lock_type, hbool_t last);
-												typeo4

											
										
										
											2019-01-03 11:53:44 +08:00
+								static herr_t H5FD_http_unlock(H5FD_t *file, unsigned char *oid, hbool_t last);
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#else
 								static herr_t H5FD_http_term(void);
 								static haddr_t H5FD_http_get_eof(const H5FD_t *_file, H5FD_mem_t type);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								static herr_t H5FD_http_flush(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
 								static herr_t H5FD_http_lock(H5FD_t *_file, hbool_t rw);
 								static herr_t H5FD_http_unlock(H5FD_t *_file);
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								/* Beware, not same as H5FD_HTTP_g */
 								static const H5FD_class_t H5FD_http_g = {
-												Extend the dispatch table for H5FD back to version 1.13.2

re: Issue https://github.com/Unidata/netcdf-c/issues/2634
re: PR https://github.com/Unidata/netcdf-c/pull/2615
re: Issue https://github.com/Unidata/netcdf-c/issues/2614

It turns out that the H5FD table change identified in
issue https://github.com/Unidata/netcdf-c/issues/2614
actually occurred in HDF5 version 1.13.2.
Since we do not test with 1.13.x, we did not catch this.

											
										
										
											2023-02-23 02:15:43 +08:00
+								#if H5_VERSION_GE(1,13,2)
-												Modify H5FDhttp.c to work with HDF5 1.14.0

re: https://github.com/Unidata/netcdf-c/issues/2614

Most of the changes are minor comment changes.
But the dispatch table for H5FD has changed, requiring changes
to H5FDhttp.c, which is derived from the HDF5 source file H5FDstdio.c.
The patch is to conditionally modify the dispatch table
to conform to the HDF5-1.14.0 version.
I was able to build and successfully test 1.14 for a reasonable
set of (non-parallel) ./configure options.

											
										
										
											2023-02-11 06:10:43 +08:00
+								    H5FD_CLASS_VERSION,		/* struct version  */
 								    H5_VFD_HTTP,		/* value           */
 								#endif
 								    "http",			/* name         */
 								    MAXADDR,			/* maxaddr      */
 								    H5F_CLOSE_WEAK,		/* fc_degree    */
-												Another typo (sigh\!)

											
										
										
											2019-01-03 07:48:11 +08:00
+								#ifndef H5FDCLASS1
-												Modify H5FDhttp.c to work with HDF5 1.14.0

re: https://github.com/Unidata/netcdf-c/issues/2614

Most of the changes are minor comment changes.
But the dispatch table for H5FD has changed, requiring changes
to H5FDhttp.c, which is derived from the HDF5 source file H5FDstdio.c.
The patch is to conditionally modify the dispatch table
to conform to the HDF5-1.14.0 version.
I was able to build and successfully test 1.14 for a reasonable
set of (non-parallel) ./configure options.

											
										
										
											2023-02-11 06:10:43 +08:00
+								    H5FD_http_term,		/* terminate    */
 								#endif
 								    NULL,			/* sb_size      */
 								    NULL,			/* sb_encode    */
 								    NULL,			/* sb_decode    */
 ,				/* fapl_size    */
 								    NULL,			/* fapl_get     */
 								    NULL,			/* fapl_copy    */
 								    NULL,			/* fapl_free    */
 ,				/* dxpl_size    */
 								    NULL,			/* dxpl_copy    */
 								    NULL,			/* dxpl_free    */
 								    H5FD_http_open,		/* open         */
 								    H5FD_http_close,		/* close        */
 								    H5FD_http_cmp,		/* cmp          */
 								    H5FD_http_query,		/* query        */
 								    NULL,			/* get_type_map */
 								    H5FD_http_alloc,		/* alloc        */
 								    NULL,			/* free         */
 								    H5FD_http_get_eoa,		/* get_eoa      */
 								    H5FD_http_set_eoa,		/* set_eoa      */
 								    H5FD_http_get_eof,		/* get_eof      */
 								    H5FD_http_get_handle,	/* get_handle   */
 								    H5FD_http_read,		/* read         */
 								    H5FD_http_write,		/* write        */
-												Extend the dispatch table for H5FD back to version 1.13.2

re: Issue https://github.com/Unidata/netcdf-c/issues/2634
re: PR https://github.com/Unidata/netcdf-c/pull/2615
re: Issue https://github.com/Unidata/netcdf-c/issues/2614

It turns out that the H5FD table change identified in
issue https://github.com/Unidata/netcdf-c/issues/2614
actually occurred in HDF5 version 1.13.2.
Since we do not test with 1.13.x, we did not catch this.

											
										
										
											2023-02-23 02:15:43 +08:00
+								#if H5_VERSION_GE(1,13,2)
-												Modify H5FDhttp.c to work with HDF5 1.14.0

re: https://github.com/Unidata/netcdf-c/issues/2614

Most of the changes are minor comment changes.
But the dispatch table for H5FD has changed, requiring changes
to H5FDhttp.c, which is derived from the HDF5 source file H5FDstdio.c.
The patch is to conditionally modify the dispatch table
to conform to the HDF5-1.14.0 version.
I was able to build and successfully test 1.14 for a reasonable
set of (non-parallel) ./configure options.

											
										
										
											2023-02-11 06:10:43 +08:00
+								    NULL,			/* read_vector     */
 								    NULL,			/* write_vector    */
 								    NULL,			/* read_selection  */
 								    NULL,			/* write_selection */
 								#endif
 								    H5FD_http_flush,		/* flush        */
 								    NULL,			/* truncate     */
 								    H5FD_http_lock,		/* lock         */
 								    H5FD_http_unlock,		/* unlock       */
-												Extend the dispatch table for H5FD back to version 1.13.2

re: Issue https://github.com/Unidata/netcdf-c/issues/2634
re: PR https://github.com/Unidata/netcdf-c/pull/2615
re: Issue https://github.com/Unidata/netcdf-c/issues/2614

It turns out that the H5FD table change identified in
issue https://github.com/Unidata/netcdf-c/issues/2614
actually occurred in HDF5 version 1.13.2.
Since we do not test with 1.13.x, we did not catch this.

											
										
										
											2023-02-23 02:15:43 +08:00
+								#if H5_VERSION_GE(1,13,2)
-												Modify H5FDhttp.c to work with HDF5 1.14.0

re: https://github.com/Unidata/netcdf-c/issues/2614

Most of the changes are minor comment changes.
But the dispatch table for H5FD has changed, requiring changes
to H5FDhttp.c, which is derived from the HDF5 source file H5FDstdio.c.
The patch is to conditionally modify the dispatch table
to conform to the HDF5-1.14.0 version.
I was able to build and successfully test 1.14 for a reasonable
set of (non-parallel) ./configure options.

											
										
										
											2023-02-11 06:10:43 +08:00
+								    NULL,			/* del          */
 								    NULL,			/* ctl	        */
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Modify H5FDhttp.c to work with HDF5 1.14.0

re: https://github.com/Unidata/netcdf-c/issues/2614

Most of the changes are minor comment changes.
But the dispatch table for H5FD has changed, requiring changes
to H5FDhttp.c, which is derived from the HDF5 source file H5FDstdio.c.
The patch is to conditionally modify the dispatch table
to conform to the HDF5-1.14.0 version.
I was able to build and successfully test 1.14 for a reasonable
set of (non-parallel) ./configure options.

											
										
										
											2023-02-11 06:10:43 +08:00
+								    H5FD_FLMAP_DICHOTOMY	/* fl_map       */
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								};
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_init
 								 *
 								 * Purpose:  Initialize this driver by registering the driver with the
 								 *    library.
 								 *
 								 * Return:  Success:  The driver ID for the driver.
 								 *
 								 *    Failure:  Negative.
 								 *
 								 * Programmer:  Robb Matzke
 								 *              Thursday, July 29, 1999
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								EXTERNL hid_t
 								H5FD_http_init(void)
 								{
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    if (H5I_VFL!=H5Iget_type(H5FD_HTTP_g))
 								        H5FD_HTTP_g = H5FDregister(&H5FD_http_g);
 								    return H5FD_HTTP_g;
 								} /* end H5FD_http_init() */
-												Add H5FD_http_finalize function and call on hdf5 finalize

Fixes Unidata#2617

											
										
										
											2023-12-13 07:16:31 +08:00
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_finalize
 								 *
 								 * Purpose:  Free this driver by unregistering the driver with the
 								 *    library.
 								 *
 								 * Returns:     Non-negative on success or negative on failure
 								 *
 								 * Programmer:  John Donoghue
 								 *              Tuesday, December 12, 2023
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								EXTERNL hid_t
 								H5FD_http_finalize(void)
 								{
 								    /* Reset VFL ID */
 								    if (H5FD_HTTP_g)
 								         H5FDunregister(H5FD_HTTP_g);
 								    H5FD_HTTP_g = 0;
 								    return H5FD_HTTP_g;
 								} /* end H5FD_http_finalize() */
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								/*---------------------------------------------------------------------------
 								 * Function:  H5FD_http_term
 								 *
 								 * Purpose:  Shut down the VFD
 								 *
 								 * Returns:     Non-negative on success or negative on failure
 								 *
 								 * Programmer:  Quincey Koziol
 								 *              Friday, Jan 30, 2004
 								 *
 								 *---------------------------------------------------------------------------
 								 */
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifndef H5FDCLASS1
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								static herr_t
 								H5FD_http_term(void)
 								{
 								    return 0;
 								} /* end H5FD_http_term() */
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								/*-------------------------------------------------------------------------
 								 * Function:  H5Pset_fapl_http
 								 *
 								 * Purpose:  Modify the file access property list to use the H5FD_HTTP
 								 *    driver defined in this source file.  There are no driver
 								 *    specific properties.
 								 *
 								 * Return:  Non-negative on success/Negative on failure
 								 *
 								 * Programmer:  Robb Matzke
 								 *    Thursday, February 19, 1998
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								EXTERNL herr_t
 								H5Pset_fapl_http(hid_t fapl_id)
 								{
 								    static const char *func = "H5FDset_fapl_http";  /*for error reporting*/
 								    /*NO TRACE*/
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    if(0 == H5Pisa_class(fapl_id, H5P_FILE_ACCESS))
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_PLIST, H5E_BADTYPE, "not a file access property list", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    return H5Pset_driver(fapl_id, H5FD_HTTP, NULL);
 								} /* end H5Pset_fapl_http() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_open
 								 *
 								 * Purpose:  Opens a remote Object as an HDF5 file.
 								 *
 								 * Errors:
 								 *  IO  CANTOPENFILE    File doesn't exist and CREAT wasn't
 								 *                      specified.
 								 *
 								 * Return:
 								 *      Success:    A pointer to a new file data structure. The
 								 *                  public fields will be initialized by the
 								 *                  caller, which is always H5FD_open().
 								 *
 								 *      Failure:    NULL
 								 *
 								 * Programmer:  Dennis Heimbigner
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static H5FD_t *
 								H5FD_http_open( const char *name, unsigned flags, hid_t /*UNUSED*/ fapl_id,
 								    haddr_t maxaddr)
 								{
 								    unsigned            write_access = 0;           /* File opened with write access? */
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+								    H5FD_http_t *file = NULL;
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    static const char   *func = "H5FD_http_open";  /* Function Name for error reporting */
 								    long long len = -1;
 								    int ncstat = NC_NOERR;
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+								    NC_HTTP_STATE* state = NULL;
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    /* Sanity check on file offsets */
 								    assert(sizeof(file_offset_t) >= sizeof(size_t));
 								    /* Quiet compiler */
 								    fapl_id = fapl_id;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    /* Check arguments */
 								    if (!name || !*name)
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_ARGS, H5E_BADVALUE, "invalid URL", NULL);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    if (0 == maxaddr || HADDR_UNDEF == maxaddr)
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_ARGS, H5E_BADRANGE, "bogus maxaddr", NULL);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    if (ADDR_OVERFLOW(maxaddr))
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_ARGS, H5E_OVERFLOW, "maxaddr too large", NULL);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    /* Always read-only */
 								    write_access = 0;
-												Significantly Improve Amazon S3 Cloud Storage Support

## S3 Related Fixes

* Add comprehensive support for specifying AWS profiles to provide access credentials.
* Parse the files "~/.aws/config" and "~/.aws/credentials to provide credentials for the HDF5 ROS3 driver and to locate default region.
* Add a function to obtain the currently active S3 credentials. The search rules are defined in docs/nczarr.md.
* Provide documentation for the new features.
* Modify the struct NCauth (in include/ncauth.h) to replace specific S3 credentials with a profile name.
* Add a unit test to test the operation of profile and credentials management.
* Add support for URLS of the form "s3://<bucket>/<key>"; this requires obtaining a default region.
* Allows the specification of profile and/or region in a URL of the form "#mode=nczarr,...&aws.region=...&aws.profile=..."

## Misc. Fixes

* Move the ezxml code to libdispatch so that it can be used both by DAP4 and nczarr.
* Modify nclist to provide a deep clone operation.
* Modify ncuri to provide a deep clone operation.
* Modify the .rc file format to allow the specification of a path to be tested when looking for an entry in the .rc file.
* Ensure that the NC_rcload function is called.
* Modify nchttp to support setting request headers.

											
										
										
											2021-09-28 08:36:33 +08:00
+								   /* Open file in read-only mode, to check for existence  and get length */
-												Improve S3 Documentation and Support

## Improvements to S3 Documentation
* Create a new document *quickstart_paths.md* that give a summary of the legal path formats used by netcdf-c. This includes both file paths and URL paths.
* Modify *nczarr.md* to remove most of the S3 related text.
* Move the S3 text from *nczarr.md* to a new document *cloud.md*.
* Add some S3-related text to the *byterange.md* document.

Hopefully, this will make it easier for users to find the information they want.

## Rebuild NCZarr Testing
In order to avoid problems with running make check in parallel, two changes were made:
1. The *nczarr_test* test system was rebuilt. Now, for each test.
any generated files are kept in a test-specific directory, isolated
from all other test executions.
2. Similarly, since the S3 test bucket is shared, any generated S3 objects
are isolated using a test-specific key path.

## Other S3 Related Changes
* Add code to ensure that files created on S3 are reclaimed at end of testing.
* Used the bash "trap" command to ensure S3 cleanup even if the test fails.
* Cleanup the S3 related configure.ac flag set since S3 is used in several places. So now one should use the option *--enable-s3* instead of *--enable-nczarr-s3*, although the latter is still kept as a deprecated alias for the former.
* Get some of the github actions yml to work with S3; required fixing various test scripts adding a secret to access the Unidata S3 bucket.
* Cleanup S3 portion of libnetcdf.settings.in and netcdf_meta.h.in and test_common.in.
* Merge partial S3 support into dhttp.c.
* Create an experimental s3 access library especially for use with Windows. It is enabled by using the options *--enable-s3-internal* (automake) or *-DENABLE_S3_INTERNAL=ON* (CMake). Also add a unit-test for it.
* Move some definitions from ncrc.h to ncs3sdk.h

## Other Changes
* Provide a default implementation of strlcpy and move this and similar defaults into *dmissing.c*.

											
										
										
											2023-04-26 07:15:06 +08:00
+								    if((ncstat = nc_http_open(name,&state))) {
-												Significantly Improve Amazon S3 Cloud Storage Support

## S3 Related Fixes

* Add comprehensive support for specifying AWS profiles to provide access credentials.
* Parse the files "~/.aws/config" and "~/.aws/credentials to provide credentials for the HDF5 ROS3 driver and to locate default region.
* Add a function to obtain the currently active S3 credentials. The search rules are defined in docs/nczarr.md.
* Provide documentation for the new features.
* Modify the struct NCauth (in include/ncauth.h) to replace specific S3 credentials with a profile name.
* Add a unit test to test the operation of profile and credentials management.
* Add support for URLS of the form "s3://<bucket>/<key>"; this requires obtaining a default region.
* Allows the specification of profile and/or region in a URL of the form "#mode=nczarr,...&aws.region=...&aws.profile=..."

## Misc. Fixes

* Move the ezxml code to libdispatch so that it can be used both by DAP4 and nczarr.
* Modify nclist to provide a deep clone operation.
* Modify ncuri to provide a deep clone operation.
* Modify the .rc file format to allow the specification of a path to be tested when looking for an entry in the .rc file.
* Ensure that the NC_rcload function is called.
* Modify nchttp to support setting request headers.

											
										
										
											2021-09-28 08:36:33 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_CANTOPENFILE, "cannot access object", NULL);
 								    }
-												Improve S3 Documentation and Support

## Improvements to S3 Documentation
* Create a new document *quickstart_paths.md* that give a summary of the legal path formats used by netcdf-c. This includes both file paths and URL paths.
* Modify *nczarr.md* to remove most of the S3 related text.
* Move the S3 text from *nczarr.md* to a new document *cloud.md*.
* Add some S3-related text to the *byterange.md* document.

Hopefully, this will make it easier for users to find the information they want.

## Rebuild NCZarr Testing
In order to avoid problems with running make check in parallel, two changes were made:
1. The *nczarr_test* test system was rebuilt. Now, for each test.
any generated files are kept in a test-specific directory, isolated
from all other test executions.
2. Similarly, since the S3 test bucket is shared, any generated S3 objects
are isolated using a test-specific key path.

## Other S3 Related Changes
* Add code to ensure that files created on S3 are reclaimed at end of testing.
* Used the bash "trap" command to ensure S3 cleanup even if the test fails.
* Cleanup the S3 related configure.ac flag set since S3 is used in several places. So now one should use the option *--enable-s3* instead of *--enable-nczarr-s3*, although the latter is still kept as a deprecated alias for the former.
* Get some of the github actions yml to work with S3; required fixing various test scripts adding a secret to access the Unidata S3 bucket.
* Cleanup S3 portion of libnetcdf.settings.in and netcdf_meta.h.in and test_common.in.
* Merge partial S3 support into dhttp.c.
* Create an experimental s3 access library especially for use with Windows. It is enabled by using the options *--enable-s3-internal* (automake) or *-DENABLE_S3_INTERNAL=ON* (CMake). Also add a unit-test for it.
* Move some definitions from ncrc.h to ncs3sdk.h

## Other Changes
* Provide a default implementation of strlcpy and move this and similar defaults into *dmissing.c*.

											
										
										
											2023-04-26 07:15:06 +08:00
+								    if((ncstat = nc_http_size(state,&len))) {
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_CANTOPENFILE, "cannot access object", NULL);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    }
 								    /* Build the return value */
 								    if(NULL == (file = (H5FD_http_t *)H5allocate_memory(sizeof(H5FD_http_t),0))) {
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+									nc_http_close(state);
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_RESOURCE, H5E_NOSPACE, "memory allocation failed", NULL);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    } /* end if */
 								    memset(file,0,sizeof(H5FD_http_t));
 								    file->op = H5FD_HTTP_OP_SEEK;
 								    file->pos = HADDR_UNDEF;
 								    file->write_access = write_access;    /* Note the write_access for later */
 								    file->eof = (haddr_t)len;
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+								    file->state = state; state = NULL;
-												Fix bug in the default HDF5 byte-range reader

re: https://github.com/Unidata/netcdf-c/issues/2122

There was a string allocation error in H5FDhttp.c

											
										
										
											2021-10-18 03:55:03 +08:00
+								    file->url = H5allocate_memory(strlen(name)+1,0);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    if(file->url == NULL) {
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+									nc_http_close(state);
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_RESOURCE, H5E_NOSPACE, "memory allocation failed", NULL);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    }
 								    memcpy(file->url,name,strlen(name)+1);
 								    return((H5FD_t*)file);
 								} /* end H5FD_HTTP_OPen() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5F_http_close
 								 *
 								 * Purpose:  Closes a file.
 								 *
 								 * Errors:
 								 *    IO    CLOSEERROR  Fclose failed.
 								 *
 								 * Return:  Non-negative on success/Negative on failure
 								 *
 								 * Programmer:  Dennis Heimbigner
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
 								H5FD_http_close(H5FD_t *_file)
 								{
 								    H5FD_http_t  *file = (H5FD_http_t*)_file;
 								#if 0
 								    static const char *func = "H5FD_http_close";  /* Function Name for error reporting */
 								#endif
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    /* Close the underlying curl handle*/
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+								    if(file->state) nc_http_close(file->state);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    if(file->url) H5free_memory(file->url);
 								    H5free_memory(file);
 								    return 0;
 								} /* end H5FD_http_close() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_cmp
 								 *
 								 * Purpose:  Compares two files belonging to this driver using an
 								 *    arbitrary (but consistent) ordering.
 								 *
 								 * Return:
 								 *      Success:    A value like strcmp()
 								 *
 								 *      Failure:    never fails (arguments were checked by the caller).
 								 *
 								 * Programmer:  Robb Matzke
 								 *              Thursday, July 29, 1999
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static int
 								H5FD_http_cmp(const H5FD_t *_f1, const H5FD_t *_f2)
 								{
 								    const H5FD_http_t  *f1 = (const H5FD_http_t*)_f1;
 								    const H5FD_http_t  *f2 = (const H5FD_http_t*)_f2;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    if(strcmp(f1->url,f2->url) < 0) return -1;
 								    if(strcmp(f1->url,f2->url) > 0) return 1;
 								    return 0;
 								} /* H5FD_http_cmp() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_query
 								 *
 								 * Purpose:  Set the flags that this VFL driver is capable of supporting.
 								 *              (listed in H5FDpublic.h)
 								 *
 								 * Return:  Success:  non-negative
 								 *
 								 *    Failure:  negative
 								 *
 								 * Programmer:  Quincey Koziol
 								 *              Friday, August 25, 2000
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
 								H5FD_http_query(const H5FD_t *_f, unsigned long /*OUT*/ *flags)
 								{
 								    /* Quiet the compiler */
 								    _f=_f;
 								    /* Set the VFL feature flags that this driver supports.
 								     *
 								     * Note that this VFD does not support SWMR due to the unpredictable
 								     * nature of the buffering layer.
 								     */
 								    if(flags) {
 								        *flags = 0;
 								        *flags |= H5FD_FEAT_AGGREGATE_METADATA;     /* OK to aggregate metadata allocations                             */
 								        *flags |= H5FD_FEAT_ACCUMULATE_METADATA;    /* OK to accumulate metadata for faster writes                      */
 								        *flags |= H5FD_FEAT_DATA_SIEVE;             /* OK to perform data sieving for faster raw data reads & writes    */
 								        *flags |= H5FD_FEAT_AGGREGATE_SMALLDATA;    /* OK to aggregate "small" raw data allocations                     */
-												typos3

											
										
										
											2019-01-03 11:31:06 +08:00
+								#ifndef H5FDCLASS1
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								        *flags |= H5FD_FEAT_DEFAULT_VFD_COMPATIBLE; /* VFD creates a file which can be opened with the default VFD      */
-												typos3

											
										
										
											2019-01-03 11:31:06 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    }
 								    return 0;
 								} /* end H5FD_http_query() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_alloc
 								 *
 								 * Purpose:     Allocates file memory. If fseeko isn't available, makes
 								 *              sure the file size isn't bigger than 2GB because the
 								 *              parameter OFFSET of fseek is of the type LONG INT, limiting
 								 *              the file size to 2GB.
 								 *
 								 * Return:
 								 *      Success:    Address of new memory
 								 *
 								 *      Failure:    HADDR_UNDEF
 								 *
 								 * Programmer:  Raymond Lu
 								 *              30 March 2007
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static haddr_t
 								H5FD_http_alloc(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, hid_t /*UNUSED*/ dxpl_id, hsize_t size)
 								{
 								    H5FD_http_t    *file = (H5FD_http_t*)_file;
 								    haddr_t         addr;
 								    /* Quiet compiler */
 								    type = type;
 								    dxpl_id = dxpl_id;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    /* Compute the address for the block to allocate */
 								    addr = file->eoa;
 								    file->eoa = addr + size;
 								    return addr;
 								} /* end H5FD_http_alloc() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_get_eoa
 								 *
 								 * Purpose:  Gets the end-of-address marker for the file. The EOA marker
 								 *           is the first address past the last byte allocated in the
 								 *           format address space.
 								 *
 								 * Return:  Success:  The end-of-address marker.
 								 *
 								 *    Failure:  HADDR_UNDEF
 								 *
 								 * Programmer:  Robb Matzke
 								 *              Monday, August  2, 1999
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static haddr_t
 								H5FD_http_get_eoa(const H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type)
 								{
 								    const H5FD_http_t *file = (const H5FD_http_t *)_file;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    /* Quiet compiler */
 								    type = type;
 								    return file->eoa;
 								} /* end H5FD_http_get_eoa() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_set_eoa
 								 *
 								 * Purpose:  Set the end-of-address marker for the file. This function is
 								 *    called shortly after an existing HDF5 file is opened in order
 								 *    to tell the driver where the end of the HDF5 data is located.
 								 *
 								 * Return:  Success:  0
 								 *
 								 *    Failure:  Does not fail
 								 *
 								 * Programmer:  Robb Matzke
 								 *              Thursday, July 29, 1999
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
 								H5FD_http_set_eoa(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, haddr_t addr)
 								{
 								    H5FD_http_t  *file = (H5FD_http_t*)_file;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    /* Quiet the compiler */
 								    type = type;
 								    file->eoa = addr;
 								    return 0;
 								}
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_get_eof
 								 *
 								 * Purpose:  Returns the end-of-file marker, which is the greater of
 								 *    either the Unix end-of-file or the HDF5 end-of-address
 								 *    markers.
 								 *
 								 * Return:  Success:  End of file address, the first address past
 								 *        the end of the "file", either the Unix file
 								 *        or the HDF5 file.
 								 *
 								 *    Failure:  HADDR_UNDEF
 								 *
 								 * Programmer:  Robb Matzke
 								 *              Thursday, July 29, 1999
 								 *
 								 *-------------------------------------------------------------------------
 								 */
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								static haddr_t
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifdef H5FDCLASS1
 								H5FD_http_get_eof(const H5FD_t *_file)
 								#else
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								H5FD_http_get_eof(const H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type)
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								{
 								    const H5FD_http_t  *file = (const H5FD_http_t *)_file;
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifndef H5FDCLASS1
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    /* Quiet the compiler */
 								    type = type;
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    return(file->eof);
 								} /* end H5FD_http_get_eof() */
 								/*-------------------------------------------------------------------------
 								 * Function:       H5FD_http_get_handle
 								 *
 								 * Purpose:        Returns the file handle of file driver.
 								 *
 								 * Returns:        Non-negative if succeed or negative if fails.
 								 *
 								 * Programmer:     Raymond Lu
 								 *                 Sept. 16, 2002
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
 								H5FD_http_get_handle(H5FD_t *_file, hid_t /*UNUSED*/ fapl, void **file_handle)
 								{
 								    H5FD_http_t       *file = (H5FD_http_t *)_file;
 								    static const char  *func = "H5FD_http_get_handle";  /* Function Name for error reporting */
 								    /* Quiet the compiler */
 								    fapl = fapl;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
-												This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.

											
										
										
											2020-06-29 08:02:47 +08:00
+								    *file_handle = file->state;
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    if(*file_handle == NULL)
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_WRITEERROR, "get handle failed", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    return 0;
 								} /* end H5FD_http_get_handle() */
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_read
 								 *
 								 * Purpose:  Reads SIZE bytes beginning at address ADDR in file LF and
 								 *    places them in buffer BUF.  Reading past the logical or
 								 *    physical end of file returns zeros instead of failing.
 								 *
 								 * Errors:
 								 *    IO    READERROR  fread failed.
 								 *    IO    SEEKERROR  fseek failed.
 								 *
 								 * Return:  Non-negative on success/Negative on failure
 								 *
 								 * Programmer:  Robb Matzke
 								 *    Wednesday, October 22, 1997
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
 								H5FD_http_read(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, hid_t /*UNUSED*/ dxpl_id,
 								    haddr_t addr, size_t size, void /*OUT*/ *buf)
 								{
 								    H5FD_http_t    *file = (H5FD_http_t*)_file;
 								    static const char *func = "H5FD_http_read";  /* Function Name for error reporting */
 								    int ncstat = NC_NOERR;
 								    /* Quiet the compiler */
 								    type = type;
 								    dxpl_id = dxpl_id;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    /* Check for overflow */
 								    if (HADDR_UNDEF==addr)
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret (func, H5E_ERR_CLS, H5E_IO, H5E_OVERFLOW, "file address overflowed", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    if (REGION_OVERFLOW(addr, size))
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								        H5Epush_ret (func, H5E_ERR_CLS, H5E_IO, H5E_OVERFLOW, "file address overflowed", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    /* Check easy cases */
 								    if (0 == size)
 								        return 0;
 								    if ((haddr_t)addr >= file->eof) {
 								        memset(buf, 0, size);
 								        return 0;
 								    }
 								    /* Seek to the correct file position. */
 								    if (!(file->op == H5FD_HTTP_OP_READ || file->op == H5FD_HTTP_OP_SEEK) ||
 								            file->pos != addr) {
 								#if 0
 								        if (file_fseek(file->fp, (file_offset_t)addr, SEEK_SET) < 0) {
 								            file->op = H5FD_HTTP_OP_UNKNOWN;
 								            file->pos = HADDR_UNDEF;
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								            H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_SEEKERROR, "fseek failed", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								        }
 								#endif
 								        file->pos = addr;
 								    }
 								    /* Read zeros past the logical end of file (physical is handled below) */
 								    if (addr + size > file->eof) {
 								        size_t nbytes = (size_t) (addr + size - file->eof);
 								        memset((unsigned char *)buf + size - nbytes, 0, nbytes);
 								        size -= nbytes;
 								    }
 								    {
 									NCbytes* bbuf = ncbytesnew();
-												Improve S3 Documentation and Support

## Improvements to S3 Documentation
* Create a new document *quickstart_paths.md* that give a summary of the legal path formats used by netcdf-c. This includes both file paths and URL paths.
* Modify *nczarr.md* to remove most of the S3 related text.
* Move the S3 text from *nczarr.md* to a new document *cloud.md*.
* Add some S3-related text to the *byterange.md* document.

Hopefully, this will make it easier for users to find the information they want.

## Rebuild NCZarr Testing
In order to avoid problems with running make check in parallel, two changes were made:
1. The *nczarr_test* test system was rebuilt. Now, for each test.
any generated files are kept in a test-specific directory, isolated
from all other test executions.
2. Similarly, since the S3 test bucket is shared, any generated S3 objects
are isolated using a test-specific key path.

## Other S3 Related Changes
* Add code to ensure that files created on S3 are reclaimed at end of testing.
* Used the bash "trap" command to ensure S3 cleanup even if the test fails.
* Cleanup the S3 related configure.ac flag set since S3 is used in several places. So now one should use the option *--enable-s3* instead of *--enable-nczarr-s3*, although the latter is still kept as a deprecated alias for the former.
* Get some of the github actions yml to work with S3; required fixing various test scripts adding a secret to access the Unidata S3 bucket.
* Cleanup S3 portion of libnetcdf.settings.in and netcdf_meta.h.in and test_common.in.
* Merge partial S3 support into dhttp.c.
* Create an experimental s3 access library especially for use with Windows. It is enabled by using the options *--enable-s3-internal* (automake) or *-DENABLE_S3_INTERNAL=ON* (CMake). Also add a unit-test for it.
* Move some definitions from ncrc.h to ncs3sdk.h

## Other Changes
* Provide a default implementation of strlcpy and move this and similar defaults into *dmissing.c*.

											
										
										
											2023-04-26 07:15:06 +08:00
+								        if((ncstat = nc_http_read(file->state,addr,size,bbuf))) {
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								            file->op = H5FD_HTTP_OP_UNKNOWN;
 								            file->pos = HADDR_UNDEF;
 									    ncbytesfree(bbuf); bbuf = NULL;
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								            H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_READERROR, "HTTP byte-range read failed", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								        } /* end if */
 									/* Check that proper number of bytes was read */
 									if(ncbyteslength(bbuf) != size) {
 									    ncbytesfree(bbuf); bbuf = NULL;
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								            H5Epush_ret(func, H5E_ERR_CLS, H5E_IO, H5E_READERROR, "HTTP byte-range read mismatch ", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+									}
 									/* Extract the data from buf */
 									memcpy(buf,ncbytescontents(bbuf),size);
 									ncbytesfree(bbuf);
 								    }
 								    /* Update the file position data. */
 								    file->op = H5FD_HTTP_OP_READ;
 								    file->pos = addr;
 								    return 0;
 								}
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_write
 								 *
 								 * Purpose:  Writes SIZE bytes from the beginning of BUF into file LF at
 								 *    file address ADDR.
 								 *
 								 * Errors:
 								 *    IO    SEEKERROR   fseek failed.
 								 *    IO    WRITEERROR  fwrite failed.
 								 *
 								 * Return:  Non-negative on success/Negative on failure
 								 *
 								 * Programmer:  Dennis Heimbigner
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
 								H5FD_http_write(H5FD_t *_file, H5FD_mem_t /*UNUSED*/ type, hid_t /*UNUSED*/ dxpl_id,
 								    haddr_t addr, size_t size, const void *buf)
 								{
 								    static const char *func = "H5FD_http_write";  /* Function Name for error reporting */
 								    /* Quiet the compiler */
 								    dxpl_id = dxpl_id;
 								    type = type;
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    /* Always Fails */
-												libhdf5/H5FDhttp: add missing semicolons to H5Epush_ret

In HDF5 1.12.1, this was changed from optional to required per the changelog:

>H5Epush_ret() is a function-like macro that has been changed to
>contain a `do {} while(0)` loop. Consequently, a trailing semicolon
>is now required to end the `while` statement. Previously, a trailing
>semi would work, but was not mandatory.

This should be backward compatible with older version of HDF5.
											
										
										
											2021-07-19 04:12:47 +08:00
+								    H5Epush_ret (func, H5E_ERR_CLS, H5E_IO, H5E_WRITEERROR, "file is read-only", -1);
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    return 0;
 								}
 								/*-------------------------------------------------------------------------
 								 * Function:  H5FD_http_flush
 								 *
 								 * Purpose:  Makes sure that all data is on disk.
 								 *
 								 * Errors:
 								 *    IO    SEEKERROR     fseek failed.
 								 *    IO    WRITEERROR    fflush or fwrite failed.
 								 *
 								 * Return:  Non-negative on success/Negative on failure
 								 *
 								 * Programmer:  Robb Matzke
 								 *    Wednesday, October 22, 1997
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifdef H5FDCLASS1
 								H5FD_http_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing)
 								#else
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								H5FD_http_flush(H5FD_t *_file, hid_t /*UNUSED*/ dxpl_id, hbool_t closing)
 								#endif
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								{
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifndef H5FDCLASS1
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    /* Quiet the compiler */
 								    dxpl_id = dxpl_id;
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
 								    return 0;
 								} /* end H5FD_http_flush() */
 								/*-------------------------------------------------------------------------
 								 * Function:    H5FD_http_lock
 								 *
 								 * Purpose:     Lock a file via flock
 								 *              NOTE: This function is a no-op if flock() is not present.
 								 *
 								 * Errors:
 								 *    IO    FCNTL    flock failed.
 								 *
 								 * Return:      Non-negative on success/Negative on failure
 								 *
 								 * Programmer:  Vailin Choi; March 2015
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifdef H5FDCLASS1
 								H5FD_http_lock(H5FD_t *_file, unsigned char* old, unsigned lock_type, hbool_t last)
 								#else
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								H5FD_http_lock(H5FD_t *_file, hbool_t rw)
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								{
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifdef H5FDCLASS1
 								    /* Quiet the compiler */
 								    lock_type = lock_type;
 								    last = last;
 								#else
 								    rw = rw;
 								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    return 0;
 								} /* end H5FD_http_lock() */
 								/*-------------------------------------------------------------------------
 								 * Function:    H5F_http_unlock
 								 *
 								 * Purpose:     Unlock a file via flock
 								 *              NOTE: This function is a no-op if flock() is not present.
 								 *
 								 * Errors:
 								 *    IO    FCNTL    flock failed.
 								 *
 								 * Return:      Non-negative on success/Negative on failure
 								 *
 								 * Programmer:  Vailin Choi; March 2015
 								 *
 								 *-------------------------------------------------------------------------
 								 */
 								static herr_t
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#ifdef H5FDCLASS1
-												typo5

											
										
										
											2019-01-03 12:37:31 +08:00
+								H5FD_http_unlock(H5FD_t *file, /*UNUSED*/unsigned char *oid, /*UNUSED*/ hbool_t last)
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#else
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								H5FD_http_unlock(H5FD_t *_file)
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								{
 								    /* Clear the error stack */
 								    H5Eclear2(H5E_DEFAULT);
-												It turns out the the type H5FD_class_t was changed
between HDF5 versions 1.8 and 1.10.
So modify H5FDhttp.c to be conditional on the
HDF5 major+minor version from H5public.h

											
										
										
											2019-01-03 05:37:23 +08:00
+								    /* Quiet the compiler */
 								#ifdef H5FDCLASS1
 								    oid = oid;
 								    last = last;
 								#endif
-												Provide byte-range reading of remote datasets

re: issue https://github.com/Unidata/netcdf-c/issues/1251

Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.

This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.

Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.

Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.

An additional goal here is to gain some experience with
the Amazon S3 REST protocol.

This architecture and its use documented in
the file docs/byterange.dox.

There are currently two test cases:

1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
   for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
   datasets.

This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).

1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs

Other changes:

1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
   fragment tag with a more general mode= tag.

Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.

											
										
										
											2019-01-02 09:27:36 +08:00
+								    return 0;
 								} /* end H5FD_http_unlock() */
 								#ifdef _H5private_H
 								/*
 								 * This is not related to the functionality of the driver code.
 								 * It is added here to trigger warning if HDF5 private definitions are included
 								 * by mistake.  The code should use only HDF5 public API and definitions.
 								 */
 								#error "Do not use HDF5 private definitions"
 								#endif