netcdf-c/oc2/daplex.c
Dennis Heimbigner 3db4f013bf Primary change: add dap4 support
Specific changes:
1. Add dap4 code: libdap4 and dap4_test.
   Note that until the d4ts server problem is solved, dap4 is turned off.
2. Modify various files to support dap4 flags:
	configure.ac, Makefile.am, CMakeLists.txt, etc.
3. Add nc_test/test_common.sh. This centralizes
   the handling of the locations of various
   things in the build tree: e.g. where is
   ncgen.exe located. See nc_test/test_common.sh
   for details.
4. Modify .sh files to use test_common.sh
5. Obsolete separate oc2 by moving it to be part of
   netcdf-c. This means replacing code with netcdf-c
   equivalents.
5. Add --with-testserver to configure.ac to allow
   override of the servers to be used for --enable-dap-remote-tests.
6. There were multiple versions of nctypealignment code. Try to
   centralize in libdispatch/doffset.c and include/ncoffsets.h
7. Add a unit test for the ncuri code because of its complexity.
8. Move the findserver code out of libdispatch and into
   a separate, self contained program in ncdap_test and dap4_test.
9. Move the dispatch header files (nc{3,4}dispatch.h) to
   .../include because they are now shared by modules.
10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts.
11. Make use of MREMAP if available
12. Misc. minor changes e.g.
	- #include <config.h> -> #include "config.h"
	- Add some no-install headers to /include
	- extern -> EXTERNL and vice versa as needed
	- misc header cleanup
	- clean up checking for misc. unix vs microsoft functions
13. Change copyright decls in some files to point to LICENSE file.
14. Add notes to RELEASENOTES.md
2017-03-08 17:01:10 -07:00

375 lines
8.8 KiB
C

/* Copyright 2009, UCAR/Unidata and OPeNDAP, Inc.
See the COPYRIGHT file for more information. */
#include "config.h"
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include "dapparselex.h"
#include "dapy.h"
#undef URLCVT /* NEVER turn this on */
#define DAP2ENCODE
#ifdef DAP2ENCODE
#define KEEPSLASH
#endif
/* Forward */
static void dumptoken(DAPlexstate* lexstate);
static void dapaddyytext(DAPlexstate* lex, int c);
#ifndef DAP2ENCODE
static int tohex(int c);
#endif
/****************************************************/
#ifdef INFORMATIONAL
/* Set of all ascii printable characters */
static char ascii[] = " !\"#$%&'()*+,-./:;<=>?@[]\\^_`|{}~";
/* Define the set of legal nonalphanum characters as specified in the DAP2 spec. */
static char* daplegal ="_!~*'-\"";
#endif
static char* ddsworddelims =
"{}[]:;=,";
/* Define 1 and > 1st legal characters */
static char* ddswordchars1 =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\.*";
static char* ddswordcharsn =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\.*#";
static char* daswordcharsn =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\.*#:";
static char* cewordchars1 =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\";
static char* cewordcharsn =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%\\";
/* Current sets of legal characters */
/*
static char* wordchars1 = NULL;
static char* wordcharsn = NULL;
static char* worddelims = NULL;
*/
static char* keywords[] = {
"alias",
"array",
"attributes",
"byte",
"dataset",
"error",
"float32",
"float64",
"grid",
"int16",
"int32",
"maps",
"sequence",
"string",
"structure",
"uint16",
"uint32",
"url",
"code",
"message",
"program_type",
"program",
NULL /* mark end of the keywords list */
};
static int keytokens[] = {
SCAN_ALIAS,
SCAN_ARRAY,
SCAN_ATTR,
SCAN_BYTE,
SCAN_DATASET,
SCAN_ERROR,
SCAN_FLOAT32,
SCAN_FLOAT64,
SCAN_GRID,
SCAN_INT16,
SCAN_INT32,
SCAN_MAPS,
SCAN_SEQUENCE,
SCAN_STRING,
SCAN_STRUCTURE,
SCAN_UINT16,
SCAN_UINT32,
SCAN_URL,
SCAN_CODE,
SCAN_MESSAGE,
SCAN_PTYPE,
SCAN_PROG
};
/**************************************************/
int
daplex(YYSTYPE* lvalp, DAPparsestate* state)
{
DAPlexstate* lexstate = state->lexstate;
int token;
int c;
unsigned int i;
char* p;
char* tmp;
YYSTYPE lval = NULL;
token = 0;
ncbytesclear(lexstate->yytext);
/* invariant: p always points to current char */
for(p=lexstate->next;token==0&&(c=*p);p++) {
if(c == '\n') {
lexstate->lineno++;
} else if(c <= ' ' || c == '\177') {
/* whitespace: ignore */
} else if(c == '#') {
/* single line comment */
while((c=*(++p))) {if(c == '\n') break;}
} else if(strchr(lexstate->worddelims,c) != NULL) {
/* don't put in lexstate->yytext to avoid memory leak */
token = c;
} else if(c == '"') {
int more = 1;
/* We have a string token; will be reported as WORD_STRING */
while(more && (c=*(++p))) {
if(c == '"') {
more = 0;
continue;
}
#ifdef DAP2ENCODE
if(c == '\\') {
/* Resolve spec ambiguity about handling of \c:
1. !KEEPSLASH: convert \c to c for any character c
2. KEEPSLASH: convert \c to \c for any character c;
that is, keep the backslash.
It is clear that the problem being addressed was \".
But it is unclear what to to do about \n: convert to
Ascii LF or leave as \n.
This code will leave as \n and assume higher levels
of code will address the issue.
*/
#ifdef KEEPSLASH
dapaddyytext(lexstate,c);
#endif
c=*(++p);
if(c == '\0') more = 0;
}
#else /*Non-standard*/
switch (c) {
case '\\':
c=*(++p);
switch (c) {
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case 'f': c = '\f'; break;
case 't': c = '\t'; break;
case 'x': {
int d1,d2;
c = '?';
++p;
d1 = tohex(*p++);
if(d1 < 0) {
daperror(state,"Illegal \\xDD in TOKEN_STRING");
} else {
d2 = tohex(*p++);
if(d2 < 0) {
daperror(state,"Illegal \\xDD in TOKEN_STRING");
} else {
c=(((unsigned int)d1)<<4) | (unsigned int)d2;
}
}
} break;
default: break;
}
break;
default: break;
}
#endif /*!DAP2ENCODE*/
if(more) dapaddyytext(lexstate,c);
}
token=WORD_STRING;
} else if(strchr(lexstate->wordchars1,c) != NULL) {
int isdatamark = 0;
/* we have a WORD_WORD */
dapaddyytext(lexstate,c);
while((c=*(++p))) {
#ifdef URLCVT
if(c == '%' && p[1] != 0 && p[2] != 0
&& strchr(hexdigits,p[1]) != NULL
&& strchr(hexdigits,p[2]) != NULL) {
int d1,d2;
d1 = tohex(p[1]);
d2 = tohex(p[2]);
if(d1 >= 0 || d2 >= 0) {
c=(((unsigned int)d1)<<4) | (unsigned int)d2;
p+=2;
}
} else {
if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
}
dapaddyytext(lexstate,c);
#else
if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
dapaddyytext(lexstate,c);
#endif
}
/* Special check for Data: */
tmp = ncbytescontents(lexstate->yytext);
if(strcmp(tmp,"Data")==0 && *p == ':') {
dapaddyytext(lexstate,*p); p++;
if(p[0] == '\n') {
token = SCAN_DATA;
isdatamark = 1;
p++;
} else if(p[0] == '\r' && p[1] == '\n') {
token = SCAN_DATA;
isdatamark = 1;
p+=2;
}
}
if(!isdatamark) {
/* check for keyword */
token=WORD_WORD; /* assume */
for(i=0;;i++) {
if(keywords[i] == NULL) break;
if(strcasecmp(keywords[i],tmp)==0) {
token=keytokens[i];
break;
}
}
}
} else { /* illegal */
}
}
lexstate->next = p;
strncpy(lexstate->lasttokentext,ncbytescontents(lexstate->yytext),MAX_TOKEN_LENGTH);
lexstate->lasttoken = token;
if(ocdebug >= 2)
dumptoken(lexstate);
/*Put return value onto Bison stack*/
if(ncbyteslength(lexstate->yytext) == 0)
lval = NULL;
else {
lval = ncbytesdup(lexstate->yytext);
nclistpush(lexstate->reclaim,(void*)lval);
}
if(lvalp) *lvalp = lval;
return token; /* Return the type of the token. */
}
static void
dapaddyytext(DAPlexstate* lex, int c)
{
ncbytesappend(lex->yytext,c);
}
#ifndef DAP2ENCODE
static int
tohex(int c)
{
if(c >= 'a' && c <= 'f') return (c - 'a') + 0xa;
if(c >= 'A' && c <= 'F') return (c - 'A') + 0xa;
if(c >= '0' && c <= '9') return (c - '0');
return -1;
}
#endif
static void
dumptoken(DAPlexstate* lexstate)
{
fprintf(stderr,"TOKEN = |%s|\n",ncbytescontents(lexstate->yytext));
}
/*
Simple lexer
*/
void
dapsetwordchars(DAPlexstate* lexstate, int kind)
{
switch (kind) {
case 0:
lexstate->worddelims = ddsworddelims;
lexstate->wordchars1 = ddswordchars1;
lexstate->wordcharsn = ddswordcharsn;
break;
case 1:
lexstate->worddelims = ddsworddelims;
lexstate->wordchars1 = ddswordchars1;
lexstate->wordcharsn = daswordcharsn;
break;
case 2:
lexstate->worddelims = ddsworddelims;
lexstate->wordchars1 = cewordchars1;
lexstate->wordcharsn = cewordcharsn;
break;
default: break;
}
}
void
daplexinit(char* input, DAPlexstate** lexstatep)
{
DAPlexstate* lexstate;
if(lexstatep == NULL) return; /* no point in building it */
lexstate = (DAPlexstate*)malloc(sizeof(DAPlexstate));
*lexstatep = lexstate;
if(lexstate == NULL) return;
memset((void*)lexstate,0,sizeof(DAPlexstate));
lexstate->input = strdup(input);
lexstate->next = lexstate->input;
lexstate->yytext = ncbytesnew();
lexstate->reclaim = nclistnew();
dapsetwordchars(lexstate,0); /* Assume DDS */
}
void
daplexcleanup(DAPlexstate** lexstatep)
{
DAPlexstate* lexstate = *lexstatep;
if(lexstate == NULL) return;
if(lexstate->input != NULL) ocfree(lexstate->input);
if(lexstate->reclaim != NULL) {
while(nclistlength(lexstate->reclaim) > 0) {
char* word = (char*)nclistpop(lexstate->reclaim);
if(word) free(word);
}
nclistfree(lexstate->reclaim);
}
ncbytesfree(lexstate->yytext);
free(lexstate);
*lexstatep = NULL;
}
/* Dap identifiers will come to us with some
characters escaped using the URL notation of
%HH. The assumption here is that any character
that is encoded is left encoded, except as follows:
1. if the encoded character is in fact a legal DAP2 character
(alphanum+"_!~*'-\"") then it is decoded, otherwise not.
*/
#ifndef DECODE_IDENTIFIERS
static char* decodelist =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_!~*'-\"";
#endif
char*
dapdecode(DAPlexstate* lexstate, char* name)
{
char* decoded = NULL;
#ifdef DECODE_IDENTIFIERS
decoded = ncuridecode(name);
#else
decoded = ncuridecodeonly(name,decodelist);
#endif
nclistpush(lexstate->reclaim,(void*)decoded);
return decoded;
}