mirror of
https://github.com/curl/curl.git
synced 2025-03-01 15:15:34 +08:00
urlapi: fix parsing URL without slash with CURLU_URLENCODE
When CURLU_URLENCODE is set, the parser would mistreat the path component if the URL was specified without a slash like in http://local.test:80?-123 Extended test 1560 to reproduce and verify the fix. Reported-by: Trail of Bits Closes #9763
This commit is contained in:
parent
81094cb492
commit
7d6cf06f57
77
lib/urlapi.c
77
lib/urlapi.c
@ -116,26 +116,26 @@ static const char *find_host_sep(const char *url)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decide in an encoding-independent manner whether a character in a
|
* Decide in an encoding-independent manner whether a character in a URL must
|
||||||
* URL must be escaped. The same criterion must be used in strlen_url()
|
* be escaped. This is used in urlencode_str().
|
||||||
* and strcpy_url().
|
|
||||||
*/
|
*/
|
||||||
static bool urlchar_needs_escaping(int c)
|
static bool urlchar_needs_escaping(int c)
|
||||||
{
|
{
|
||||||
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
|
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
|
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
|
||||||
* in the source URL accordingly.
|
* spaces in the source URL accordingly.
|
||||||
*
|
*
|
||||||
* URL encoding should be skipped for host names, otherwise IDN resolution
|
* URL encoding should be skipped for host names, otherwise IDN resolution
|
||||||
* will fail.
|
* will fail.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
|
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
|
||||||
|
size_t len, bool relative,
|
||||||
|
bool query)
|
||||||
{
|
{
|
||||||
/* we must add this with whitespace-replacing */
|
/* we must add this with whitespace-replacing */
|
||||||
bool left = TRUE;
|
bool left = !query;
|
||||||
const unsigned char *iptr;
|
const unsigned char *iptr;
|
||||||
const unsigned char *host_sep = (const unsigned char *) url;
|
const unsigned char *host_sep = (const unsigned char *) url;
|
||||||
|
|
||||||
@ -143,8 +143,7 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
|
|||||||
host_sep = (const unsigned char *) find_host_sep(url);
|
host_sep = (const unsigned char *) find_host_sep(url);
|
||||||
|
|
||||||
for(iptr = (unsigned char *)url; /* read from here */
|
for(iptr = (unsigned char *)url; /* read from here */
|
||||||
*iptr; /* until zero byte */
|
len; iptr++, len--) {
|
||||||
iptr++) {
|
|
||||||
|
|
||||||
if(iptr < host_sep) {
|
if(iptr < host_sep) {
|
||||||
if(Curl_dyn_addn(o, iptr, 1))
|
if(Curl_dyn_addn(o, iptr, 1))
|
||||||
@ -361,7 +360,7 @@ static char *concat_url(char *base, const char *relurl)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* then append the new piece on the right side */
|
/* then append the new piece on the right side */
|
||||||
strcpy_url(&newest, useurl, !host_changed);
|
urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
|
||||||
|
|
||||||
return Curl_dyn_ptr(&newest);
|
return Curl_dyn_ptr(&newest);
|
||||||
}
|
}
|
||||||
@ -1130,16 +1129,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(*path && (flags & CURLU_URLENCODE)) {
|
|
||||||
struct dynbuf enc;
|
|
||||||
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
|
||||||
if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
|
|
||||||
result = CURLUE_OUT_OF_MEMORY;
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
path = u->path = Curl_dyn_ptr(&enc);
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment = strchr(path, '#');
|
fragment = strchr(path, '#');
|
||||||
if(fragment) {
|
if(fragment) {
|
||||||
fraglen = strlen(fragment);
|
fraglen = strlen(fragment);
|
||||||
@ -1163,12 +1152,25 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
|||||||
size_t qlen = strlen(query) - fraglen; /* includes '?' */
|
size_t qlen = strlen(query) - fraglen; /* includes '?' */
|
||||||
pathlen = strlen(path) - qlen - fraglen;
|
pathlen = strlen(path) - qlen - fraglen;
|
||||||
if(qlen > 1) {
|
if(qlen > 1) {
|
||||||
u->query = Curl_memdup(query + 1, qlen);
|
if(qlen && (flags & CURLU_URLENCODE)) {
|
||||||
if(!u->query) {
|
struct dynbuf enc;
|
||||||
result = CURLUE_OUT_OF_MEMORY;
|
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
||||||
goto fail;
|
/* skip the leading question mark */
|
||||||
|
if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
|
||||||
|
result = CURLUE_OUT_OF_MEMORY;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
qlen = Curl_dyn_len(&enc);
|
||||||
|
query = u->query = Curl_dyn_ptr(&enc);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
u->query = Curl_memdup(query + 1, qlen);
|
||||||
|
if(!u->query) {
|
||||||
|
result = CURLUE_OUT_OF_MEMORY;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
u->query[qlen - 1] = 0;
|
||||||
}
|
}
|
||||||
u->query[qlen - 1] = 0;
|
|
||||||
|
|
||||||
if(junkscan(u->query, flags)) {
|
if(junkscan(u->query, flags)) {
|
||||||
result = CURLUE_BAD_QUERY;
|
result = CURLUE_BAD_QUERY;
|
||||||
@ -1187,6 +1189,17 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
|||||||
else
|
else
|
||||||
pathlen = strlen(path) - fraglen;
|
pathlen = strlen(path) - fraglen;
|
||||||
|
|
||||||
|
if(pathlen && (flags & CURLU_URLENCODE)) {
|
||||||
|
struct dynbuf enc;
|
||||||
|
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
||||||
|
if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
|
||||||
|
result = CURLUE_OUT_OF_MEMORY;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
pathlen = Curl_dyn_len(&enc);
|
||||||
|
path = u->path = Curl_dyn_ptr(&enc);
|
||||||
|
}
|
||||||
|
|
||||||
if(!pathlen) {
|
if(!pathlen) {
|
||||||
/* there is no path left, unset */
|
/* there is no path left, unset */
|
||||||
path = NULL;
|
path = NULL;
|
||||||
@ -1563,13 +1576,15 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if(ptr) {
|
if(ptr) {
|
||||||
*part = strdup(ptr);
|
size_t partlen = strlen(ptr);
|
||||||
|
size_t i = 0;
|
||||||
|
*part = Curl_memdup(ptr, partlen + 1);
|
||||||
if(!*part)
|
if(!*part)
|
||||||
return CURLUE_OUT_OF_MEMORY;
|
return CURLUE_OUT_OF_MEMORY;
|
||||||
if(plusdecode) {
|
if(plusdecode) {
|
||||||
/* convert + to space */
|
/* convert + to space */
|
||||||
char *plus;
|
char *plus = *part;
|
||||||
for(plus = *part; *plus; ++plus) {
|
for(i = 0; i < partlen; ++plus, i++) {
|
||||||
if(*plus == '+')
|
if(*plus == '+')
|
||||||
*plus = ' ';
|
*plus = ' ';
|
||||||
}
|
}
|
||||||
@ -1586,11 +1601,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
|||||||
return CURLUE_URLDECODE;
|
return CURLUE_URLDECODE;
|
||||||
}
|
}
|
||||||
*part = decoded;
|
*part = decoded;
|
||||||
|
partlen = dlen;
|
||||||
}
|
}
|
||||||
if(urlencode) {
|
if(urlencode) {
|
||||||
struct dynbuf enc;
|
struct dynbuf enc;
|
||||||
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
||||||
if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
|
if(urlencode_str(&enc, *part, partlen, TRUE,
|
||||||
|
what == CURLUPART_QUERY))
|
||||||
return CURLUE_OUT_OF_MEMORY;
|
return CURLUE_OUT_OF_MEMORY;
|
||||||
free(*part);
|
free(*part);
|
||||||
*part = Curl_dyn_ptr(&enc);
|
*part = Curl_dyn_ptr(&enc);
|
||||||
|
@ -138,6 +138,12 @@ struct clearurlcase {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const struct testcase get_parts_list[] ={
|
static const struct testcase get_parts_list[] ={
|
||||||
|
{"https://user@example.net?he l lo",
|
||||||
|
"https | user | [12] | [13] | example.net | [15] | / | he+l+lo | [17]",
|
||||||
|
CURLU_ALLOW_SPACE, CURLU_URLENCODE, CURLUE_OK},
|
||||||
|
{"https://user@example.net?he l lo",
|
||||||
|
"https | user | [12] | [13] | example.net | [15] | / | he l lo | [17]",
|
||||||
|
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||||
{"https://exam{}[]ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
{"https://exam{}[]ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
||||||
{"https://exam{ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
{"https://exam{ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
||||||
{"https://exam}ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
{"https://exam}ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
||||||
@ -849,6 +855,18 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const struct redircase set_url_list[] = {
|
static const struct redircase set_url_list[] = {
|
||||||
|
{"http://example.com/please/../gimme/%TESTNUMBER?foobar#hello",
|
||||||
|
"http://example.net/there/it/is/../../tes t case=/%TESTNUMBER0002? yes no",
|
||||||
|
"http://example.net/there/tes%20t%20case=/%TESTNUMBER0002?+yes+no",
|
||||||
|
0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
|
||||||
|
{"http://local.test?redirect=http://local.test:80?-321",
|
||||||
|
"http://local.test:80?-123",
|
||||||
|
"http://local.test:80/?-123",
|
||||||
|
0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
|
||||||
|
{"http://local.test?redirect=http://local.test:80?-321",
|
||||||
|
"http://local.test:80?-123",
|
||||||
|
"http://local.test:80/?-123",
|
||||||
|
0, 0, CURLUE_OK},
|
||||||
{"http://example.org/static/favicon/wikipedia.ico",
|
{"http://example.org/static/favicon/wikipedia.ico",
|
||||||
"//fake.example.com/licenses/by-sa/3.0/",
|
"//fake.example.com/licenses/by-sa/3.0/",
|
||||||
"http://fake.example.com/licenses/by-sa/3.0/",
|
"http://fake.example.com/licenses/by-sa/3.0/",
|
||||||
|
Loading…
Reference in New Issue
Block a user