Fix URL encoding in DAP2 url processing

re: Github issue https://github.com/Unidata/netcdf-c/issues/1832 and Github issue https://github.com/Unidata/netcdf4-python/issues/1041 Handling of URL escape sequences for some servers (e.g. http://iridl.ldeo.columbia.edu) appears to be somewhat non-standard. In particular, certain characters need escaping that other servers do not. Fortunately, the changes should also work existing other servers.
2024-11-27 07:30:33 +08:00 · 2020-09-08 12:41:12 -06:00 · 2020-09-08 12:41:12 -06:00 · c3c89693c4
commit c3c89693c4
parent cb6e990cd1
4 changed files with 24 additions and 4 deletions
--- a/NUG/DAP2.dox
+++ b/NUG/DAP2.dox
@ -672,6 +672,14 @@ entries should have same value, which is the file path for the
 certificate produced by MyProxyLogon. The HTTP.SSL.CAPATH entry should
 be the path to the "certificates" directory produced by MyProxyLogon.

+## URL escaping
+
+The DAP2 standard specifies the use of URL %xx escaping as also
+the escape mechanism for DAP2 itself. This can cause some confusion.
+To try to simplify this, the netcdf-c library DAP2 code assumes
+that all URL path and constraint information is unescaped. It is assumed
+that just before transmission, the constructed URL will be properly escaped.
+
 # Point of Contact {#dap2_poc}

 __Author__: Dennis Heimbigner<br>
--- a/libdap2/dapdebug.h
+++ b/libdap2/dapdebug.h
@ -6,7 +6,7 @@
 #define DEBUG_H

 /* Warning: setting CATCHERROR has significant performance impact */
-#define CATCHERROR
+#undef CATCHERROR

 #if 0
 #define DAPDEBUG 2
--- a/libdispatch/ncuri.c
+++ b/libdispatch/ncuri.c
@ -57,6 +57,16 @@
 #define rshift(buf,buflen) {memmove(buf+1,buf,buflen+1);}

 /* Allowable character sets for encode */
+
+/* ascii = " !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" */
+
+/* Classes according to the URL RFC" */
+#define RFCRESERVED " !*'();:@&=+$,/?#[]"
+#define RFCUNRESERVED "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~"
+#define RFCOTHER "\"%<>\\^`{|}"
+
+/* I really hate the URL encoding mess */
+
 static const char* pathallow =
 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!#$&'()*+,-./:;=?@_~";

@ -868,9 +878,7 @@ ncuriencodeonly(const char* s, const char* allowable)

    for(inptr=s,outptr=encoded;*inptr;) {
 	int c = *inptr++;
-        if(c == ' ') {
-	    *outptr++ = '+';
-        } else {
+	{
            /* search allowable */
 	    char* p = strchr(allowable,c);
 	    if(p != NULL) {
--- a/ncdump/test_unicode_directory.sh
+++ b/ncdump/test_unicode_directory.sh
@ -17,6 +17,10 @@ ERR() {
    fi
 }

+LC_ALL="C.UTF-8"
+export LC_ALL
+
+#UNISTRING='海'
 UNISTRING=$(echo '\xe6\xb5\xb7')

 echo ""