Merge pull request #2947 from mannreis/main

S3 Mode url reconstruction defaults to wrong server type
This commit is contained in:
Ward Fisher 2024-07-15 16:04:49 -06:00 committed by GitHub
commit 62ab618b66
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 103 additions and 18 deletions

View File

@ -15,7 +15,7 @@
/* Track the server type, if known */
typedef enum NCS3SVC {NCS3UNK=0, /* unknown */
NCS3=1, /* s3.amazon.aws */
NCS3GS=0 /* storage.googleapis.com */
NCS3GS=2 /* storage.googleapis.com */
} NCS3SVC;
typedef struct NCS3INFO {

View File

@ -110,17 +110,24 @@ NC_s3urlrebuild(NCURI* url, NCS3INFO* s3, NCURI** newurlp)
/* split the path by "/" */
if((stat = NC_split_delim(url->path,'/',pathsegments))) goto done;
/* Distinguish path-style from virtual-host style from s3: and from other.
Virtual: https://<bucket-name>.s3.<region>.amazonaws.com/<path> (1)
or: https://<bucket-name>.s3.amazonaws.com/<path> -- region defaults (to us-east-1) (2)
Path: https://s3.<region>.amazonaws.com/<bucket-name>/<path> (3)
or: https://s3.amazonaws.com/<bucket-name>/<path> -- region defaults to us-east-1 (4)
S3: s3://<bucket-name>/<path> (5)
Google: https://storage.googleapis.com/<bucket-name>/<path> (6)
or: gs3://<bucket-name>/<path> (7)
Other: https://<host>/<bucket-name>/<path> (8)
*/
if(url->host == NULL || strlen(url->host) == 0)
/* Distinguish path-style from virtual-host style from s3: and from other.
Virtual:
(1) https://<bucket-name>.s3.<region>.amazonaws.com/<path>
(2) https://<bucket-name>.s3.amazonaws.com/<path> -- region defaults (to us-east-1)
Path:
(3) https://s3.<region>.amazonaws.com/<bucket-name>/<path>
(4) https://s3.amazonaws.com/<bucket-name>/<path> -- region defaults to us-east-1
S3:
(5) s3://<bucket-name>/<path>
Google:
(6) https://storage.googleapis.com/<bucket-name>/<path>
(7) gs3://<bucket-name>/<path>
Other:
(8) https://<host>/<bucket-name>/<path>
(9) https://<bucket-name>.s3.<region>.domain.example.com/<path>
(10)https://s3.<region>.example.com/<bucket>/<path>
*/
if(url->host == NULL || strlen(url->host) == 0)
{stat = NC_EURL; goto done;}
/* Reduce the host to standard form such as s3.amazonaws.com by pulling out the
@ -168,12 +175,21 @@ NC_s3urlrebuild(NCURI* url, NCS3INFO* s3, NCURI** newurlp)
/* region is unknown */
/* bucket is unknown at this point */
svc = NCS3GS;
} else { /* Presume Format (8) */
if((host = strdup(url->host))==NULL)
{stat = NC_ENOMEM; goto done;}
/* region is unknown */
/* bucket is unknown */
}
} else { /* Presume Formats (8),(9),(10) */
if (nclistlength(hostsegments) > 3 && strcasecmp(nclistget(hostsegments, 1), "s3") == 0){
bucket = nclistremove(hostsegments, 0);
region = nclistremove(hostsegments, 2);
host = strdup(url->host + sizeof(bucket) + 1);
}else{
if (nclistlength(hostsegments) > 2 && strcasecmp(nclistget(hostsegments, 0), "s3") == 0){
region = nclistremove(hostsegments, 1);
}
if ((host = strdup(url->host)) == NULL){
stat = NC_ENOMEM;
goto done;
}
}
}
/* region = (1) from url, (2) s3->region, (3) default */
if(region == NULL && s3 != NULL)

View File

@ -14,6 +14,7 @@
# run on Windows.
SET(UNIT_TESTS test_ncuri)
add_bin_test(unit_test test_ncuri)
IF(USE_X_GETOPT)
SET(XGETOPTSRC "${CMAKE_CURRENT_SOURCE_DIR}/../libdispatch/XGetopt.c")

View File

@ -12,6 +12,9 @@ Test the ncuri parsing
#include <string.h>
#include "netcdf.h"
#include "ncuri.h"
#ifdef NETCDF_ENABLE_S3
#include "ncpathmgr.h" // to initialize global state needed by NC_s3urlrebuild
#endif
typedef struct Test {
char* url;
@ -47,6 +50,34 @@ static Test TESTS[] = {
{NULL,NULL}
};
#if defined(NETCDF_ENABLE_S3)
static Test S3TESTS[] = {
//Virtual
{"https://<bucket-name>.s3.<region>.amazonaws.com/<path>","https://s3.<region>.amazonaws.com/<bucket-name>/<path>#mode=s3"},
{"https://<bucket-name>.s3.amazonaws.com/<path>","https://s3.us-east-1.amazonaws.com/<bucket-name>/<path>#mode=s3"},
//Path
{"https://s3.<region>.amazonaws.com/<bucket-name>/<path>","https://s3.<region>.amazonaws.com/<bucket-name>/<path>#mode=s3"},
{"https://s3.amazonaws.com/<bucket-name>/<path>","https://s3.us-east-1.amazonaws.com/<bucket-name>/<path>#mode=s3"},
//s3
{"s3://<bucket-name>/<path>","https://s3.us-east-1.amazonaws.com/<bucket-name>/<path>#mode=s3"},
//Google
{"https://storage.googleapis.com/<bucket-name>/<path>","https://storage.googleapis.com/<bucket-name>/<path>#mode=s3"},
{"gs3://<bucket-name>/<path>","https://storage.googleapis.com/<bucket-name>/<path>#mode=s3"},
//Other
// (8) https://<host>/<bucket-name>/<path>
{"https://<host>/<bucket>/path/2/resource/#mode=s3,zarr", "https://<host>/<bucket>/path/2/resource#mode=s3,zarr"},
// (9) https://<bucket-name>.s3.<region>.domain.example.com/<path>
{"https://<bucket>.s3.<region>.<domain>/path/2/resource/#mode=s3,zarr", "https://s3.<region>.<domain>/<bucket>/path/2/resource#mode=s3,zarr"},
// (10)https://s3.<region>.example.com/<bucket>/<path>
{"https://s3.<region>.example.com/bucket/path/2/resource/#mode=s3,zarr", "https://s3.<region>.example.com/bucket/path/2/resource#mode=s3,zarr"},
{"https://s3.example.com/bucket/path/2/resource/#mode=s3,zarr", "https://s3.example.com/bucket/path/2/resource#mode=s3,zarr"},
{"https://server.example.com/bucket/path/2/resource#mode=s3", "https://server.example.com/bucket/path/2/resource#mode=s3"},
{"https://prefix.<bucket>.s3.localhost.example.cloud/path/2/resource#mode=s3", "https://prefix.<bucket>.s3.localhost.example.cloud/path/2/resource#mode=s3"},
{"https://<bucket>.s3.<region>.localhost/path/2/resource#mode=s3", "https://s3.<region>.localhost/<bucket>/path/2/resource#mode=s3"},
{NULL,NULL}
};
#endif
/* Tests that should fail */
static char* XTESTS[] = {
"[dap4http://localhost:8081/x",
@ -92,6 +123,43 @@ main(int argc, char** argv)
}
}
#ifdef NETCDF_ENABLE_S3
nc_initialize();
for(index=0, test=S3TESTS;test->url;test++,index++) {
int ret = 0;
NCURI* uri = NULL;
ret = ncuriparse(test->url,&uri);
if(ret != NC_NOERR) {
fprintf(stderr,"Parse fail: %s\n",test->url);
failcount++;
} else {
int iss3 = NC_iss3(uri,NULL);
if(iss3 != 0){
NCURI * newuri = NULL;
if(NC_s3urlrebuild(uri,NULL,&newuri)){
fprintf(stderr, "Could not reinterpret url [%d] with s3urlrebuild: %s\n",index,test->url);
fprintf(stderr,"Mismatch: [%d] expected=|%s| actual=|%s|\n",index,test->expected,"NULL");
failcount ++;
}else{
char* built = ncuribuild(newuri,NULL,NULL,NCURIALL);
if(built == NULL) {
fprintf(stderr,"Build fail: %s\n",test->url);
failcount++;
} else {
if(strcmp(test->expected,built) != 0) {
fprintf(stderr,"Mismatch: [%d] expected=|%s| actual=|%s|\n",index,test->expected,built);
failcount++;
}
free(built);
}
}
ncurifree(newuri);
}
}
ncurifree(uri);
}
nc_finalize();
#endif
fprintf(stderr,"%s test_ncuri\n",failcount > 0 ? "***FAIL":"***PASS");
return (failcount > 0 ? 1 : 0);
}