urlapi: fix parsing URL without slash with CURLU_URLENCODE

When CURLU_URLENCODE is set, the parser would mistreat the path
component if the URL was specified without a slash like in
http://local.test:80?-123

Extended test 1560 to reproduce and verify the fix.

Reported-by: Trail of Bits

Closes #9763
This commit is contained in:
Daniel Stenberg 2022-10-18 15:54:06 +02:00
parent 81094cb492
commit 7d6cf06f57
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
2 changed files with 65 additions and 30 deletions

View File

@ -116,26 +116,26 @@ static const char *find_host_sep(const char *url)
}
/*
* Decide in an encoding-independent manner whether a character in a
* URL must be escaped. The same criterion must be used in strlen_url()
* and strcpy_url().
* Decide in an encoding-independent manner whether a character in a URL must
* be escaped. This is used in urlencode_str().
*/
static bool urlchar_needs_escaping(int c)
{
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
}
/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
* in the source URL accordingly.
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
* spaces in the source URL accordingly.
*
* URL encoding should be skipped for host names, otherwise IDN resolution
* will fail.
*
*/
static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t len, bool relative,
bool query)
{
/* we must add this with whitespace-replacing */
bool left = TRUE;
bool left = !query;
const unsigned char *iptr;
const unsigned char *host_sep = (const unsigned char *) url;
@ -143,8 +143,7 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
host_sep = (const unsigned char *) find_host_sep(url);
for(iptr = (unsigned char *)url; /* read from here */
*iptr; /* until zero byte */
iptr++) {
len; iptr++, len--) {
if(iptr < host_sep) {
if(Curl_dyn_addn(o, iptr, 1))
@ -361,7 +360,7 @@ static char *concat_url(char *base, const char *relurl)
}
/* then append the new piece on the right side */
strcpy_url(&newest, useurl, !host_changed);
urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
return Curl_dyn_ptr(&newest);
}
@ -1130,16 +1129,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
}
}
if(*path && (flags & CURLU_URLENCODE)) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
path = u->path = Curl_dyn_ptr(&enc);
}
fragment = strchr(path, '#');
if(fragment) {
fraglen = strlen(fragment);
@ -1163,12 +1152,25 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
size_t qlen = strlen(query) - fraglen; /* includes '?' */
pathlen = strlen(path) - qlen - fraglen;
if(qlen > 1) {
u->query = Curl_memdup(query + 1, qlen);
if(!u->query) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
if(qlen && (flags & CURLU_URLENCODE)) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
/* skip the leading question mark */
if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
qlen = Curl_dyn_len(&enc);
query = u->query = Curl_dyn_ptr(&enc);
}
else {
u->query = Curl_memdup(query + 1, qlen);
if(!u->query) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
u->query[qlen - 1] = 0;
}
u->query[qlen - 1] = 0;
if(junkscan(u->query, flags)) {
result = CURLUE_BAD_QUERY;
@ -1187,6 +1189,17 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
else
pathlen = strlen(path) - fraglen;
if(pathlen && (flags & CURLU_URLENCODE)) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
pathlen = Curl_dyn_len(&enc);
path = u->path = Curl_dyn_ptr(&enc);
}
if(!pathlen) {
/* there is no path left, unset */
path = NULL;
@ -1563,13 +1576,15 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
break;
}
if(ptr) {
*part = strdup(ptr);
size_t partlen = strlen(ptr);
size_t i = 0;
*part = Curl_memdup(ptr, partlen + 1);
if(!*part)
return CURLUE_OUT_OF_MEMORY;
if(plusdecode) {
/* convert + to space */
char *plus;
for(plus = *part; *plus; ++plus) {
char *plus = *part;
for(i = 0; i < partlen; ++plus, i++) {
if(*plus == '+')
*plus = ' ';
}
@ -1586,11 +1601,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
return CURLUE_URLDECODE;
}
*part = decoded;
partlen = dlen;
}
if(urlencode) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
if(urlencode_str(&enc, *part, partlen, TRUE,
what == CURLUPART_QUERY))
return CURLUE_OUT_OF_MEMORY;
free(*part);
*part = Curl_dyn_ptr(&enc);

View File

@ -138,6 +138,12 @@ struct clearurlcase {
};
static const struct testcase get_parts_list[] ={
{"https://user@example.net?he l lo",
"https | user | [12] | [13] | example.net | [15] | / | he+l+lo | [17]",
CURLU_ALLOW_SPACE, CURLU_URLENCODE, CURLUE_OK},
{"https://user@example.net?he l lo",
"https | user | [12] | [13] | example.net | [15] | / | he l lo | [17]",
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
{"https://exam{}[]ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
{"https://exam{ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
{"https://exam}ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
@ -849,6 +855,18 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
}
static const struct redircase set_url_list[] = {
{"http://example.com/please/../gimme/%TESTNUMBER?foobar#hello",
"http://example.net/there/it/is/../../tes t case=/%TESTNUMBER0002? yes no",
"http://example.net/there/tes%20t%20case=/%TESTNUMBER0002?+yes+no",
0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
{"http://local.test?redirect=http://local.test:80?-321",
"http://local.test:80?-123",
"http://local.test:80/?-123",
0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
{"http://local.test?redirect=http://local.test:80?-321",
"http://local.test:80?-123",
"http://local.test:80/?-123",
0, 0, CURLUE_OK},
{"http://example.org/static/favicon/wikipedia.ico",
"//fake.example.com/licenses/by-sa/3.0/",
"http://fake.example.com/licenses/by-sa/3.0/",