From 12ec5711d74f48b61e5d49cf1de4f03cd80e8ddb Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Sat, 10 Jun 2023 18:51:13 -0600 Subject: [PATCH] Fix some problems with Earthdata authorization. re: Issue https://github.com/Unidata/netcdf-c/issues/2704 The issue reported problems accessing e.g. opendap.earthdata.nasa.gov, which uses the authentication mechanisms of urs.earthdata.nasa.gov. The file *docs/auth.md* describes how to setup the proper authorization mechanisms for earthdata, but there turned out to be some bugs in the code that prevented this from working. ## Primary Changes * Add some clarification text to *auth.md*. * Fix the process for loading and merging *.ncrc* and *.dodsrc* file to conform to documentation. * Fix *NC_s3urlrebuild* so that non-S3 urls are passed through unchanged. * Fix a bug in the .rc test *test_rcmerge.sh*. --- docs/auth.md | 101 ++++++++++++++++++++--------------------- libdispatch/drc.c | 28 +++++------- libdispatch/ds3util.c | 8 ++-- ncdump/test_rcmerge.sh | 2 +- unit_test/Makefile.am | 2 +- 5 files changed, 68 insertions(+), 73 deletions(-) diff --git a/docs/auth.md b/docs/auth.md index 75bd5d5a3..dfb49b326 100644 --- a/docs/auth.md +++ b/docs/auth.md @@ -13,17 +13,14 @@ NetCDF Authorization Support {#auth} netCDF can support user authorization using the facilities provided by the curl library. This includes basic password authentication as well as certificate-based authorization. - -At the moment, this document only applies to DAP2 and DAP4 access -because they are (for now) the only parts of the netCDF-C library -that uses libcurl. +At the moment, this document only applies to DAP2 and DAP4 access. With some exceptions (e.g. see the section on redirection) The libcurl authorization mechanisms can be accessed in two ways 1. Inserting the username and password into the url, or 2. Accessing information from a so-called _rc_ file named either - `.ncrc` or `.dodsrc`. The latter is deprecated, but will be supported indefinitely. + `.ncrc` or `.dodsrc`. The latter is historical and deprecated, but will be supported indefinitely. ## URL-Based Authentication {#auth_url} @@ -52,29 +49,18 @@ Locating the _rc_ file is a multi-step process. ### Search Order -The file must be called one of the following names: -".daprc" or ".dodsrc". -If both ".daprc" and ".dodsrc" exist, then -the ".daprc" file will take precedence. +The netcdf-c library searches for, and loads from, the following files, +in this order: +1. $HOME/.ncrc +2. $HOME/.dodsrc +3. $CWD/.ncrc +4. $CWD/.dodsrc -It is strongly suggested that you pick one of the two names -and use it always. Otherwise you may observe unexpected results -when the netcdf-c library finds one that you did not intend. +*$HOME* is the user's home directory and *$CWD* is the current working directory. Entries in later files override any of the earlier files -The search for an _rc_ file looks in the following places in this order. - -1. Check for the environment variable named _DAPRCFILE_. - This will specify the full path for the _rc_ file - (not just the containing directory). -2. Search the current working directory (`./`) looking - for (in order) .daprc or .dodsrc. -3. Search the HOME directory (`$HOME`) looking - for (in order) .daprc or .dodsrc. The HOME environment - variable is used to define the directory in which to search. - -It is strongly suggested that you pick a uniform location -and use it always. Otherwise you may observe unexpected results -when the netcdf-c library get an rc file you did not expect. +It is strongly suggested that you pick a uniform location and a uniform name +and use them always. Otherwise you may observe unexpected results +when the netcdf-c library loads an rc file you did not expect. ### RC File Format @@ -86,22 +72,18 @@ where the bracket-enclosed host:port is optional. ### URL Constrained RC File Entries -Each line of the rc file can begin with -a host+port enclosed in square brackets. -The form is "host:port". -If the port is not specified -then the form is just "host". -The reason that more of the url is not used is that -libcurl's authorization grain is not any finer than host level. - -Examples. +Each line of the rc file can begin with a host+port enclosed in +square brackets. The form is "host:port". If the port is not +specified then the form is just "host". The reason that more of +the url is not used is that libcurl's authorization grain is not +any finer than host level. +Here are some examples. +```` [remotetest.unidata.ucar.edu]HTTP.VERBOSE=1 - or - [fake.ucar.edu:9090]HTTP.VERBOSE=0 - +```` If the url request from, say, the _netcdf_open_ method has a host+port matching one of the prefixes in the rc file, then the corresponding entry will be used, otherwise ignored. @@ -109,15 +91,15 @@ This means that an entry with a matching host+port will take precedence over an entry without a host+port. For example, the URL - +```` http://remotetest.unidata.ucar.edu/thredds/dodsC/testdata/testData.nc - +```` will have HTTP.VERBOSE set to 1 because its host matches the example above. Similarly, - +```` http://fake.ucar.edu:9090/dts/test.01 - +```` will have HTTP.VERBOSE set to 0 because its host+port matches the example above. ## Authorization-Related Keys {#auth_keys} @@ -128,7 +110,7 @@ The second column is the affected curl_easy_setopt option(s), if any - + @@ -141,7 +123,7 @@ The second column is the affected curl_easy_setopt option(s), if any - +
KeyAffected curl_easy_setopt OptionsNotes
HTTP.COOKIEJARCURLOPT_COOKIEJAR
HTTP.COOKIEFILECURLOPT_COOKIEJARAlias for CURLOPT_COOKIEJAR
HTTP.COOKIEFILECURLOPT_COOKIEJARCOOKIEJAR and COOKIEFILE are considered aliases, so setting one will set the other as well.
HTTP.PROXY.SERVERCURLOPT_PROXY, CURLOPT_PROXYPORT, CURLOPT_PROXYUSERPWD
HTTP.PROXY_SERVERCURLOPT_PROXY, CURLOPT_PROXYPORT, CURLOPT_PROXYUSERPWDDecprecated: use HTTP.PROXY.SERVER
HTTP.SSL.CERTIFICATECURLOPT_SSLCERT
HTTP.CREDENTIALS.USERPASSWORDCURLOPT_USERPASSWORD
HTTP.CREDENTIALS.USERNAMECURLOPT_USERNAME
HTTP.CREDENTIALS.PASSWORDCURLOPT_PASSWORD
HTTP.NETRCN.A.Specify path of the .netrc file
HTTP.NETRCCURLOPT_NETRC, CURLOPT_NETRC_FILESpecify path of the .netrc file to use and enables its use.
AWS.PROFILEN.A.Specify name of a profile in from the .aws/credentials file
AWS.REGIONN.A.Specify name of a default region
@@ -188,7 +170,7 @@ HTTP.SSL.KEY is essentially the same as HTTP.SSL.CERTIFICATE and should always have the same value. HTTP.SSL.KEYPASSWORD -specifies the password for accessing the HTTP.SSL.CERTIFICAT/HTTP.SSL.key file. +specifies the password for accessing the HTTP.SSL.CERTIFICATE/HTTP.SSL.key file. HTTP.SSL.CAPATH specifies the path to a directory containing @@ -207,9 +189,10 @@ HTTP.PROXY_SERVER deprecated; use HTTP.PROXY.SERVER HTTP.NETRC -specifies the absolute path of the .netrc file. +specifies the absolute path of the .netrc file, +and causes it to be used instead of username and password. See [redirection authorization](#REDIR) -for information about using .netrc. +for information about using *.netrc*. ## Password Escaping {#auth_userpwdescape} @@ -252,12 +235,12 @@ using the _https_ protocol (note the use of _https_ instead of _http_). the client back to the SOI to actually obtain the data. It turns out that libcurl, by default, uses the password in the -`.daprc` file (or from the url) for all connections that request +`.ncrc` file (or from the url) for all connections that request a password. This causes problems because only the the specific redirected connection is the one that actually requires the password. This is where the `.netrc` file comes in. Libcurl will use `.netrc` for the redirected connection. It is possible to cause libcurl -to use the `.daprc` password always, but this introduces a +to use the `.ncrc` password always, but this introduces a security hole because it may send the initial user+pwd to every server in the redirection chain. In summary, if you are using redirection, then you are @@ -274,9 +257,9 @@ which the client is redirected for authorization, and the login and password are those needed to authenticate on that machine. The location of the `.netrc` file can be specified by -putting the following line in your `.daprc`/`.dodsrc` file. +putting the following line in your `.ncrc`/`.dodsrc` file. - HTTP.NETRC= + HTTP.NETRC= If not specified, then libcurl will look first in the current directory, and then in the HOME directory. @@ -286,6 +269,22 @@ to specify a real file in the file system to act as the cookie jar file (HTTP.COOKIEJAR) so that the redirect site can properly pass back authorization information. +### Accessing *earthdata.nasa.gov* + +Since it is so common, here is a set of templates to use to +access *earthdata.nasa.gov*. + +#### *.ncrc* File +```` +HTTP.NETRC=/home//.netrc +HTTP.COOKIEJAR=/home//.urs_cookies +```` + +#### *.netrc* File +```` +machine urs.earthdata.nasa.gov login password +```` + ## Client-Side Certificates {#auth_clientcerts} Some systems, notably ESG (Earth System Grid), requires diff --git a/libdispatch/drc.c b/libdispatch/drc.c index 6c5bc4607..6d4f7487d 100644 --- a/libdispatch/drc.c +++ b/libdispatch/drc.c @@ -26,10 +26,6 @@ See COPYRIGHT for license information. #include "nc4internal.h" #include "ncdispatch.h" -#ifndef nulldup - #define nulldup(x) ((x)?strdup(x):(x)) -#endif - #undef NOREAD #undef DRCDEBUG @@ -66,7 +62,7 @@ static void freeprofile(struct AWSprofile* profile); static void freeprofilelist(NClist* profiles); /* Define default rc files and aliases, also defines load order*/ -static const char* rcfilenames[] = {".ncrc", ".daprc", ".dodsrc",NULL}; +static const char* rcfilenames[] = {".ncrc", ".daprc", ".dodsrc", NULL}; /* Read these files in order and later overriding earlier */ static const char* awsconfigfiles[] = {".aws/config",".aws/credentials",NULL}; @@ -97,9 +93,11 @@ nc_rc_get(const char* key) ncg = NC_getglobalstate(); assert(ncg != NULL && ncg->rcinfo != NULL && ncg->rcinfo->entries != NULL); - if(ncg->rcinfo->ignore) return NC_NOERR; + if(ncg->rcinfo->ignore) goto done; value = NC_rclookup(key,NULL,NULL); - return nulldup(value); +done: + value = nulldup(value); + return value; } /** @@ -121,8 +119,9 @@ nc_rc_set(const char* key, const char* value) ncg = NC_getglobalstate(); assert(ncg != NULL && ncg->rcinfo != NULL && ncg->rcinfo->entries != NULL); - if(ncg->rcinfo->ignore) return NC_NOERR; + if(ncg->rcinfo->ignore) goto done;; stat = NC_rcfile_insert(key,NULL,NULL,value); +done: return stat; } @@ -132,7 +131,6 @@ nc_rc_set(const char* key, const char* value) /* Initialize defaults and load: * .ncrc -* .daprc * .dodsrc * ${HOME}/.aws/config * ${HOME}/.aws/credentials @@ -235,13 +233,11 @@ NC_rcload(void) /* locate the configuration files in order of use: 1. Specified by NCRCENV_RC environment variable. 2. If NCRCENV_RC is not set then merge the set of rc files in this order: - 1. $RCHOME/.ncrc - 2. $RCHOME/.daprc - 3. $RCHOME/.docsrc - 4. $CWD/.ncrc - 5. $CWD/.daprc - 6. $CWD/.docsrc - Entry in later files override any of the earlier files + 1. $HOME/.ncrc + 2. $HOME/.dodsrc + 3. $CWD/.ncrc + 4. $CWD/.dodsrc + Entries in later files override any of the earlier files */ if(globalstate->rcinfo->rcfile != NULL) { /* always use this */ nclistpush(rcfileorder,strdup(globalstate->rcinfo->rcfile)); diff --git a/libdispatch/ds3util.c b/libdispatch/ds3util.c index 2f769dd3d..a018fc297 100644 --- a/libdispatch/ds3util.c +++ b/libdispatch/ds3util.c @@ -150,10 +150,10 @@ NC_s3urlrebuild(NCURI* url, char** inoutbucketp, char** inoutregionp, NCURI** ne /* Construct the revised path */ ncbytesclear(buf); - ncbytescat(buf,"/"); - if(bucket == NULL) - {stat = NC_EURL; goto done;} - ncbytescat(buf,bucket); + if(bucket != NULL) { + ncbytescat(buf,"/"); + ncbytescat(buf,bucket); + } for(i=0;i> $HOME/.ncrc + echo "ncrc=ncrc1" >> $RCHOME/.ncrc echo "ncrcx=ncrcx" >> $RCHOME/.ncrc echo "ncrc=ncrc2" >> $RCHOME/.dodsrc echo "daprc=daprc" >> $RCHOME/.daprc diff --git a/unit_test/Makefile.am b/unit_test/Makefile.am index 9948bab57..c159af91c 100644 --- a/unit_test/Makefile.am +++ b/unit_test/Makefile.am @@ -45,7 +45,7 @@ TESTS += run_s3sdk.sh endif endif -EXTRA_DIST = CMakeLists.txt run_s3sdk.sh run_reclaim.sh +EXTRA_DIST = CMakeLists.txt run_s3sdk.sh run_reclaim_tests.sh EXTRA_DIST += nctest_netcdf4_classic.nc reclaim_tests.cdl CLEANFILES = reclaim_tests*.txt reclaim_tests.nc