netcdf-c/libncxml/ncxml_xml2.c
Dennis Heimbigner 69e84fe9f1 Fix byterange handling of some URLS
re: Issue

The byterange handling of the following URLS fails.

### Problem 1: "https://crudata.uea.ac.uk/cru/data/temperature/HadCRUT.4.6.0.0.median.nc#mode=bytes"
It turns out that byterange in hdf5 has two possible targets: S3 and not-S3 (e.g. a thredds server or the crudata URL above). Each uses a different HDF5 Virtual File Driver (VFD).
I incorrectly set up the byterange code in libhdf5 so that it would choose one or the other of the two VFD's for any netcdf-c library build. The fix is to allow it to choose either one at run-time.

### Problem 2: "https://noaa-goes16.s3.amazonaws.com/ABI-L1b-RadF/2022/001/18/OR_ABI-L1b-RadF-M6C01_G16_s20220011800205_e20220011809513_c20220011809562.nc#mode=bytes,s3"
When given what appears to be an S3-related URL, the netcdf-c library code converts it into a canonical, so-called "path" format. In casing out the possible input URL formats, I missed the case where the host contains the bucket ("noaa-goes16"), but not the region. So the fix was to check for this case.

## Misc. Related Changes
1. Since S3 is used in more than just NCZarr, I changed the automake/cmake options to replace "--enable-nczarr-s3" with "--enable-s3", but keeping the former option as a synonym for the latter. This also entailed cleaning up libnetcdf.settings WRT S3 support
2. Added the above URLS as additional test cases

## Misc. Un-Related Changes
1. CURLOPT_PUT is deprecated in favor to CURLOPT_UPLOAD
2. Fix some minor warnings

## Open Problems
* Under Ubuntu, either libcrypto or aws-sdk-cpp has a memory leak.
2023-03-02 19:51:02 -07:00

162 lines
3.3 KiB
C

/* Copyright 2018-2018 University Corporation for Atmospheric Research/Unidata. */
#include <stdlib.h>
#include <string.h>
#include <libxml2/libxml/parser.h>
#include <libxml/tree.h>
#include "ncxml.h"
#ifndef nulldup
#define nulldup(s) ((s)?strdup(s):NULL)
#endif
static int ncxml_initialized = 0;
void
ncxml_initialize(void)
{
ncxml_initialized = 1;
}
void
ncxml_finalize(void)
{
ncxml_initialized = 0;
xmlCleanupParser();
}
ncxml_doc_t
ncxml_parse(char* contents, size_t len)
{
xmlDocPtr doc; /* the resulting document tree */
doc = xmlReadMemory(contents, (int)len, "dap4.xml", NULL, 0);
return (ncxml_doc_t)doc;
}
void
ncxml_free(ncxml_doc_t doc0)
{
xmlDoc *doc = (xmlDoc*)doc0;
xmlFreeDoc(doc);
}
ncxml_t
ncxml_root(ncxml_doc_t doc0)
{
xmlDoc *doc = (xmlDoc*)doc0;
return (ncxml_t)xmlDocGetRootElement(doc);
}
const char*
ncxml_name(ncxml_t xml0)
{
xmlNode* xml = (xmlNode*)xml0;
return (const char*)(xml?xml->name:NULL);
}
char*
ncxml_attr(ncxml_t xml0, const char* key)
{
xmlNode* xml = (xmlNode*)xml0;
xmlChar* value = NULL;
char* s = NULL;
value = xmlGetProp(xml,(const xmlChar*)key);
s = nulldup((char*)value);
xmlFree(value);
return s;
}
/* First child by name */
ncxml_t
ncxml_child(ncxml_t xml0, const char* name)
{
xmlNode* xml = (xmlNode*)xml0;
xmlNode* child = NULL;
for(child=xml->children;child; child = child->next) {
if(child->type == XML_ELEMENT_NODE && strcmp((const char*)child->name,name)==0)
return (ncxml_t)child;
}
return NULL;
}
ncxml_t
ncxml_next(ncxml_t xml0, const char* name)
{
xmlNode* xml = (xmlNode*)xml0;
xmlNode* next = NULL;
for(next=xml->next;next; next = next->next) {
if(next->type == XML_ELEMENT_NODE && strcmp((const char*)next->name,name)==0)
return (ncxml_t)next;
}
return NULL;
}
char*
ncxml_text(ncxml_t xml0)
{
xmlNode* xml = (xmlNode*)xml0;
xmlChar* txt = NULL;
char* s = NULL;
if(xml == NULL) return NULL;
txt = xmlNodeGetContent(xml);
s = nulldup((char*)txt);
xmlFree(txt);
return s;
}
/* Nameless versions of child and next */
ncxml_t
ncxml_child_first(ncxml_t xml0)
{
xmlNode* xml = (xmlNode*)xml0;
xmlNode* child = NULL;
if(xml == NULL) return NULL;
for(child=xml->children;child; child = child->next) {
if(child->type == XML_ELEMENT_NODE) return child;
}
return NULL;
}
ncxml_t
ncxml_child_next(ncxml_t xml0)
{
xmlNode* xml = (xmlNode*)xml0;
if(xml == NULL) return NULL;
for(xml=xml->next;xml; xml = xml->next) {
if(xml->type == XML_ELEMENT_NODE) return xml;
}
return NULL;
}
int
ncxml_attr_pairs(ncxml_t xml0, char*** pairsp)
{
char** pairs = NULL;
xmlNode* xml = (xmlNode*)xml0;
xmlAttr* attr = NULL;
int i,count = 0;
if(xml == NULL) return 0;
/* First count */
for(attr=xml->properties;attr;attr=attr->next) count++;
/* Allocate */
pairs = (char**)malloc(sizeof(char*)*((2*count)+1));
if(pairs == NULL) return 0;
/* Collect */
for(i=0,attr=xml->properties;attr;i+=2,attr=attr->next) {
xmlChar* value;
pairs[i] = nulldup((char*)attr->name);
value = xmlNodeListGetString(xml->doc, attr->children, 1);
pairs[i+1] = nulldup((char*)value);
xmlFree(value);
}
pairs[2*count] = NULL;
if(pairsp) *pairsp = pairs;
return 1;
}