netcdf-c/oc2/daplex.c
Dennis Heimbigner 49737888ca Improve S3 Documentation and Support
## Improvements to S3 Documentation
* Create a new document *quickstart_paths.md* that give a summary of the legal path formats used by netcdf-c. This includes both file paths and URL paths.
* Modify *nczarr.md* to remove most of the S3 related text.
* Move the S3 text from *nczarr.md* to a new document *cloud.md*.
* Add some S3-related text to the *byterange.md* document.

Hopefully, this will make it easier for users to find the information they want.

## Rebuild NCZarr Testing
In order to avoid problems with running make check in parallel, two changes were made:
1. The *nczarr_test* test system was rebuilt. Now, for each test.
any generated files are kept in a test-specific directory, isolated
from all other test executions.
2. Similarly, since the S3 test bucket is shared, any generated S3 objects
are isolated using a test-specific key path.

## Other S3 Related Changes
* Add code to ensure that files created on S3 are reclaimed at end of testing.
* Used the bash "trap" command to ensure S3 cleanup even if the test fails.
* Cleanup the S3 related configure.ac flag set since S3 is used in several places. So now one should use the option *--enable-s3* instead of *--enable-nczarr-s3*, although the latter is still kept as a deprecated alias for the former.
* Get some of the github actions yml to work with S3; required fixing various test scripts adding a secret to access the Unidata S3 bucket.
* Cleanup S3 portion of libnetcdf.settings.in and netcdf_meta.h.in and test_common.in.
* Merge partial S3 support into dhttp.c.
* Create an experimental s3 access library especially for use with Windows. It is enabled by using the options *--enable-s3-internal* (automake) or *-DENABLE_S3_INTERNAL=ON* (CMake). Also add a unit-test for it.
* Move some definitions from ncrc.h to ncs3sdk.h

## Other Changes
* Provide a default implementation of strlcpy and move this and similar defaults into *dmissing.c*.
2023-04-25 17:15:06 -06:00

392 lines
9.3 KiB
C

/* Copyright 2018, UCAR/Unidata and OPeNDAP, Inc.
See the COPYRIGHT file for more information. */
#include "config.h"
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include "oc.h"
#include "dapparselex.h"
#include "dapy.h"
#undef URLCVT /* NEVER turn this on */
/* Do we %xx decode all or part of a DAP Identifier: see dapdecode() */
#define DECODE_PARTIAL
#define DAP2ENCODE
#ifdef DAP2ENCODE
#define KEEPSLASH
#endif
/* Forward */
static void dumptoken(DAPlexstate* lexstate);
static void dapaddyytext(DAPlexstate* lex, int c);
#ifndef DAP2ENCODE
static int tohex(int c);
#endif
/****************************************************/
#ifdef INFORMATIONAL
/* Set of all ascii printable characters */
static const char ascii[] = " !\"#$%&'()*+,-./:;<=>?@[]\\^_`|{}~";
/* Define the set of legal nonalphanum characters as specified in the DAP2 spec. */
static const char* daplegal ="_!~*'-\"";
#endif
static const char* ddsworddelims =
"{}[]:;=,";
/* Define 1 and > 1st legal characters */
/* Note: for some reason I added # and removed !~'"
what was I thinking?
*/
static const char* ddswordchars1 =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
"-+_/%\\.*!~'\"";
static const char* ddswordcharsn =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
"-+_/%\\.*!~'\"";
/* This includes sharp and colon for historical reasons */
static const char* daswordcharsn =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
"-+_/%\\.*#:!~'\"";
/* Need to remove '.' to allow for fqns */
static const char* cewordchars1 =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
"-+_/%\\*!~'\"";
static const char* cewordcharsn =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
"-+_/%\\*!~'\"";
/* Current sets of legal characters */
/*
static char* wordchars1 = NULL;
static char* wordcharsn = NULL;
static char* worddelims = NULL;
*/
static const char* keywords[] = {
"alias",
"array",
"attributes",
"byte",
"dataset",
"error",
"float32",
"float64",
"grid",
"int16",
"int32",
"maps",
"sequence",
"string",
"structure",
"uint16",
"uint32",
"url",
"code",
"message",
"program_type",
"program",
NULL /* mark end of the keywords list */
};
static const int keytokens[] = {
SCAN_ALIAS,
SCAN_ARRAY,
SCAN_ATTR,
SCAN_BYTE,
SCAN_DATASET,
SCAN_ERROR,
SCAN_FLOAT32,
SCAN_FLOAT64,
SCAN_GRID,
SCAN_INT16,
SCAN_INT32,
SCAN_MAPS,
SCAN_SEQUENCE,
SCAN_STRING,
SCAN_STRUCTURE,
SCAN_UINT16,
SCAN_UINT32,
SCAN_URL,
SCAN_CODE,
SCAN_MESSAGE,
SCAN_PTYPE,
SCAN_PROG
};
/**************************************************/
int
daplex(YYSTYPE* lvalp, DAPparsestate* state)
{
DAPlexstate* lexstate = state->lexstate;
int token;
int c;
unsigned int i;
char* p;
char* tmp;
YYSTYPE lval = NULL;
token = 0;
ncbytesclear(lexstate->yytext);
/* invariant: p always points to current char */
for(p=lexstate->next;token==0&&(c=*p);p++) {
if(c == '\n') {
lexstate->lineno++;
} else if(c <= ' ' || c == '\177') {
/* whitespace: ignore */
} else if(c == '#') {
/* single line comment */
while((c=*(++p))) {if(c == '\n') break;}
} else if(strchr(lexstate->worddelims,c) != NULL) {
/* don't put in lexstate->yytext to avoid memory leak */
token = c;
} else if(c == '"') {
int more = 1;
/* We have a string token; will be reported as WORD_STRING */
while(more && (c=*(++p))) {
if(c == '"') {
more = 0;
continue;
}
#ifdef DAP2ENCODE
if(c == '\\') {
/* Resolve spec ambiguity about handling of \c:
1. !KEEPSLASH: convert \c to c for any character c
2. KEEPSLASH: convert \c to \c for any character c;
that is, keep the backslash.
It is clear that the problem being addressed was \".
But it is unclear what to to do about \n: convert to
Ascii LF or leave as \n.
This code will leave as \n and assume higher levels
of code will address the issue.
*/
#ifdef KEEPSLASH
dapaddyytext(lexstate,c);
#endif
c=*(++p);
if(c == '\0') more = 0;
}
#else /*Non-standard*/
switch (c) {
case '\\':
c=*(++p);
switch (c) {
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case 'f': c = '\f'; break;
case 't': c = '\t'; break;
case 'x': {
int d1,d2;
c = '?';
++p;
d1 = tohex(*p++);
if(d1 < 0) {
daperror(state,"Illegal \\xDD in TOKEN_STRING");
} else {
d2 = tohex(*p++);
if(d2 < 0) {
daperror(state,"Illegal \\xDD in TOKEN_STRING");
} else {
c=(((unsigned int)d1)<<4) | (unsigned int)d2;
}
}
} break;
default: break;
}
break;
default: break;
}
#endif /*!DAP2ENCODE*/
if(more) dapaddyytext(lexstate,c);
}
token=WORD_STRING;
} else if(strchr(lexstate->wordchars1,c) != NULL) {
int isdatamark = 0;
/* we have a WORD_WORD */
dapaddyytext(lexstate,c);
while((c=*(++p))) {
#ifdef URLCVT
if(c == '%' && p[1] != 0 && p[2] != 0
&& strchr(hexdigits,p[1]) != NULL
&& strchr(hexdigits,p[2]) != NULL) {
int d1,d2;
d1 = tohex(p[1]);
d2 = tohex(p[2]);
if(d1 >= 0 || d2 >= 0) {
c=(((unsigned int)d1)<<4) | (unsigned int)d2;
p+=2;
}
} else {
if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
}
dapaddyytext(lexstate,c);
#else
if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
dapaddyytext(lexstate,c);
#endif
}
/* Special check for Data: */
tmp = ncbytescontents(lexstate->yytext);
if(strcmp(tmp,"Data")==0 && *p == ':') {
dapaddyytext(lexstate,*p); p++;
if(p[0] == '\n') {
token = SCAN_DATA;
isdatamark = 1;
p++;
} else if(p[0] == '\r' && p[1] == '\n') {
token = SCAN_DATA;
isdatamark = 1;
p+=2;
}
}
if(!isdatamark) {
/* check for keyword */
token=WORD_WORD; /* assume */
for(i=0;;i++) {
if(keywords[i] == NULL) break;
if(strcasecmp(keywords[i],tmp)==0) {
token=keytokens[i];
break;
}
}
}
} else { /* illegal */
}
}
lexstate->next = p;
strncpy(lexstate->lasttokentext,ncbytescontents(lexstate->yytext),MAX_TOKEN_LENGTH);
lexstate->lasttoken = token;
if(ocdebug >= 2)
dumptoken(lexstate);
/*Put return value onto Bison stack*/
if(ncbyteslength(lexstate->yytext) == 0)
lval = NULL;
else {
lval = ncbytesdup(lexstate->yytext);
nclistpush(lexstate->reclaim,(void*)lval);
}
if(lvalp) *lvalp = lval;
return token; /* Return the type of the token. */
}
static void
dapaddyytext(DAPlexstate* lex, int c)
{
ncbytesappend(lex->yytext,c);
ncbytesnull(lex->yytext);
}
#ifndef DAP2ENCODE
static int
tohex(int c)
{
if(c >= 'a' && c <= 'f') return (c - 'a') + 0xa;
if(c >= 'A' && c <= 'F') return (c - 'A') + 0xa;
if(c >= '0' && c <= '9') return (c - '0');
return -1;
}
#endif
static void
dumptoken(DAPlexstate* lexstate)
{
fprintf(stderr,"TOKEN = |%s|\n",ncbytescontents(lexstate->yytext));
}
/*
Simple lexer
*/
void
dapsetwordchars(DAPlexstate* lexstate, int kind)
{
switch (kind) {
case 0:
lexstate->worddelims = ddsworddelims;
lexstate->wordchars1 = ddswordchars1;
lexstate->wordcharsn = ddswordcharsn;
break;
case 1:
lexstate->worddelims = ddsworddelims;
lexstate->wordchars1 = ddswordchars1;
lexstate->wordcharsn = daswordcharsn;
break;
case 2:
lexstate->worddelims = ddsworddelims;
lexstate->wordchars1 = cewordchars1;
lexstate->wordcharsn = cewordcharsn;
break;
default: break;
}
}
void
daplexinit(char* input, DAPlexstate** lexstatep)
{
DAPlexstate* lexstate;
if(lexstatep == NULL) return; /* no point in building it */
lexstate = (DAPlexstate*)malloc(sizeof(DAPlexstate));
*lexstatep = lexstate;
if(lexstate == NULL) return;
memset((void*)lexstate,0,sizeof(DAPlexstate));
lexstate->input = strdup(input);
lexstate->next = lexstate->input;
lexstate->yytext = ncbytesnew();
lexstate->reclaim = nclistnew();
dapsetwordchars(lexstate,0); /* Assume DDS */
}
void
daplexcleanup(DAPlexstate** lexstatep)
{
DAPlexstate* lexstate = *lexstatep;
if(lexstate == NULL) return;
if(lexstate->input != NULL) ocfree(lexstate->input);
if(lexstate->reclaim != NULL) {
while(nclistlength(lexstate->reclaim) > 0) {
char* word = (char*)nclistpop(lexstate->reclaim);
if(word) free(word);
}
nclistfree(lexstate->reclaim);
}
ncbytesfree(lexstate->yytext);
free(lexstate);
*lexstatep = NULL;
}
/* Dap identifiers will come to us with some
characters escaped using the URL notation of
%HH. The assumption here is that any character
that is encoded is left encoded, except as follows:
1. if the encoded character is in fact a legal DAP2 character
(alphanum+"_!~*'-\"") then it is decoded, otherwise not.
*/
#ifdef DECODE_PARTIAL
static const char* decodeset = /* Specify which characters are decoded */
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_!~*'-\"@";
#endif
char*
dapdecode(DAPlexstate* lexstate, char* name)
{
char* decoded = NULL;
#ifdef DECODE_PARTIAL
decoded = ncuridecodepartial(name,decodeset); /* Decode selected */
#else
decoded = ncuridecode(name); /* Decode everything */
#endif
nclistpush(lexstate->reclaim,(void*)decoded);
return decoded;
}