urlapi: have *set(PATH) prepend a slash if one is missing

Previously the code would just do that for the path when extracting the
full URL, which made a subsequent curl_url_get() of the path to
(unexpectedly) still return it without the leading path.

Amend lib1560 to verify this. Clarify the curl_url_set() docs about it.

Bug: https://curl.se/mail/lib-2023-06/0015.html
Closes #11272
Reported-by: Pedro Henrique
This commit is contained in:
Daniel Stenberg 2023-06-08 13:15:09 +02:00
parent 7d62f0d9b8
commit 3c9256c8a0
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
3 changed files with 112 additions and 20 deletions

View File

@ -101,8 +101,8 @@ Port cannot be URL encoded on set. The given port number is provided as a
string and the decimal number must be between 1 and 65535. Anything else will
return an error.
.IP CURLUPART_PATH
If a path is set in the URL without a leading slash, a slash will be inserted
automatically when this URL is read from the handle.
If a path is set in the URL without a leading slash, a slash will be prepended
automatically.
.IP CURLUPART_QUERY
The query part will also get spaces converted to pluses when asked to URL
encode on set with the \fICURLU_URLENCODE\fP bit.

View File

@ -1547,7 +1547,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
}
}
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
scheme,
u->user ? u->user : "",
u->password ? ":": "",
@ -1558,7 +1558,6 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
allochost ? allochost : u->host,
port ? ":": "",
port ? port : "",
(u->path && (u->path[0] != '/')) ? "/": "",
u->path ? u->path : "/",
(u->query && u->query[0]) ? "?": "",
(u->query && u->query[0]) ? u->query : "",
@ -1640,6 +1639,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
bool plusencode = FALSE;
bool urlskipslash = FALSE;
bool leadingslash = FALSE;
bool appendquery = FALSE;
bool equalsencode = FALSE;
@ -1751,6 +1751,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
break;
case CURLUPART_PATH:
urlskipslash = TRUE;
leadingslash = TRUE; /* enforce */
storep = &u->path;
break;
case CURLUPART_QUERY:
@ -1801,16 +1802,21 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
{
const char *newp = part;
size_t nalloc = strlen(part);
struct dynbuf enc;
if(nalloc > CURL_MAX_INPUT_LENGTH)
/* excessive input length */
return CURLUE_MALFORMED_INPUT;
Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
if(leadingslash && (part[0] != '/')) {
CURLcode result = Curl_dyn_addn(&enc, "/", 1);
if(result)
return CURLUE_OUT_OF_MEMORY;
}
if(urlencode) {
const unsigned char *i;
struct dynbuf enc;
Curl_dyn_init(&enc, nalloc * 3 + 1);
for(i = (const unsigned char *)part; *i; i++) {
CURLcode result;
@ -1838,14 +1844,13 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
return CURLUE_OUT_OF_MEMORY;
}
}
newp = Curl_dyn_ptr(&enc);
}
else {
char *p;
newp = strdup(part);
if(!newp)
CURLcode result = Curl_dyn_add(&enc, part);
if(result)
return CURLUE_OUT_OF_MEMORY;
p = (char *)newp;
p = Curl_dyn_ptr(&enc);
while(*p) {
/* make sure percent encoded are lower case */
if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
@ -1858,6 +1863,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
p++;
}
}
newp = Curl_dyn_ptr(&enc);
if(appendquery) {
/* Append the 'newp' string onto the old query. Add a '&' separator if
@ -1866,24 +1872,24 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
size_t querylen = u->query ? strlen(u->query) : 0;
bool addamperand = querylen && (u->query[querylen -1] != '&');
if(querylen) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
struct dynbuf qbuf;
Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
goto nomem;
if(addamperand) {
if(Curl_dyn_addn(&enc, "&", 1))
if(Curl_dyn_addn(&qbuf, "&", 1))
goto nomem;
}
if(Curl_dyn_add(&enc, newp))
if(Curl_dyn_add(&qbuf, newp))
goto nomem;
free((char *)newp);
Curl_dyn_free(&enc);
free(*storep);
*storep = Curl_dyn_ptr(&enc);
*storep = Curl_dyn_ptr(&qbuf);
return CURLUE_OK;
nomem:
free((char *)newp);
Curl_dyn_free(&enc);
return CURLUE_OUT_OF_MEMORY;
}
}
@ -1895,7 +1901,7 @@ nomem:
}
else {
if(!n || hostname_check(u, (char *)newp, n)) {
free((char *)newp);
Curl_dyn_free(&enc);
return CURLUE_BAD_HOSTNAME;
}
}

View File

@ -108,6 +108,16 @@ struct setcase {
CURLUcode pcode; /* for updating parts */
};
struct setgetcase {
const char *in;
const char *set;
const char *out;
unsigned int urlflags; /* for setting the URL */
unsigned int setflags; /* for updating parts */
unsigned int getflags; /* for getting parts */
CURLUcode pcode; /* for updating parts */
};
struct testcase {
const char *in;
const char *out;
@ -747,8 +757,33 @@ static int checkurl(const char *org, const char *url, const char *out)
return 0;
}
/* 1. Set the URL
2. Set components
3. Extract all components (not URL)
*/
static const struct setgetcase setget_parts_list[] = {
{"https://example.com",
"path=get,",
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
0, 0, 0, CURLUE_OK},
{"https://example.com",
"path=/get,",
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
0, 0, 0, CURLUE_OK},
{"https://example.com",
"path=g e t,",
"https | [11] | [12] | [13] | example.com | [15] | /g%20e%20t | "
"[16] | [17]",
0, CURLU_URLENCODE, 0, CURLUE_OK},
{NULL, NULL, NULL, 0, 0, 0, CURLUE_OK}
};
/* !checksrc! disable SPACEBEFORECOMMA 1 */
static const struct setcase set_parts_list[] = {
{"https://example.com",
"path=get,",
"https://example.com/get",
0, 0, CURLUE_OK, CURLUE_OK},
{"https://example.com/",
"scheme=ftp+-.123,",
"ftp+-.123://example.com/",
@ -1120,6 +1155,54 @@ static int set_url(void)
return error;
}
/* 1. Set a URL
2. Set one or more parts
3. Extract and compare all parts - not the URL
*/
static int setget_parts(void)
{
int i;
int error = 0;
for(i = 0; setget_parts_list[i].set && !error; i++) {
CURLUcode rc;
CURLU *urlp = curl_url();
if(!urlp) {
error++;
break;
}
if(setget_parts_list[i].in)
rc = curl_url_set(urlp, CURLUPART_URL, setget_parts_list[i].in,
setget_parts_list[i].urlflags);
else
rc = CURLUE_OK;
if(!rc) {
char *url = NULL;
CURLUcode uc = updateurl(urlp, setget_parts_list[i].set,
setget_parts_list[i].setflags);
if(uc != setget_parts_list[i].pcode) {
fprintf(stderr, "updateurl\nin: %s\nreturned %d (expected %d)\n",
setget_parts_list[i].set, (int)uc, setget_parts_list[i].pcode);
error++;
}
if(!uc) {
if(checkparts(urlp, setget_parts_list[i].set, setget_parts_list[i].out,
setget_parts_list[i].getflags))
error++; /* add */
}
curl_free(url);
}
else if(rc != CURLUE_OK) {
fprintf(stderr, "Set parts\nin: %s\nreturned %d (expected %d)\n",
setget_parts_list[i].in, (int)rc, 0);
error++;
}
curl_url_cleanup(urlp);
}
return error;
}
static int set_parts(void)
{
int i;
@ -1593,6 +1676,9 @@ int test(char *URL)
{
(void)URL; /* not used */
if(setget_parts())
return 10;
if(get_url())
return 3;