mirror of
https://github.com/curl/curl.git
synced 2024-12-15 06:40:09 +08:00
urlapi: fix parsing URL without slash with CURLU_URLENCODE
When CURLU_URLENCODE is set, the parser would mistreat the path component if the URL was specified without a slash like in http://local.test:80?-123 Extended test 1560 to reproduce and verify the fix. Reported-by: Trail of Bits Closes #9763
This commit is contained in:
parent
81094cb492
commit
7d6cf06f57
77
lib/urlapi.c
77
lib/urlapi.c
@ -116,26 +116,26 @@ static const char *find_host_sep(const char *url)
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide in an encoding-independent manner whether a character in a
|
||||
* URL must be escaped. The same criterion must be used in strlen_url()
|
||||
* and strcpy_url().
|
||||
* Decide in an encoding-independent manner whether a character in a URL must
|
||||
* be escaped. This is used in urlencode_str().
|
||||
*/
|
||||
static bool urlchar_needs_escaping(int c)
|
||||
{
|
||||
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
|
||||
}
|
||||
|
||||
/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
|
||||
* in the source URL accordingly.
|
||||
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
|
||||
* spaces in the source URL accordingly.
|
||||
*
|
||||
* URL encoding should be skipped for host names, otherwise IDN resolution
|
||||
* will fail.
|
||||
*
|
||||
*/
|
||||
static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
|
||||
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
|
||||
size_t len, bool relative,
|
||||
bool query)
|
||||
{
|
||||
/* we must add this with whitespace-replacing */
|
||||
bool left = TRUE;
|
||||
bool left = !query;
|
||||
const unsigned char *iptr;
|
||||
const unsigned char *host_sep = (const unsigned char *) url;
|
||||
|
||||
@ -143,8 +143,7 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
|
||||
host_sep = (const unsigned char *) find_host_sep(url);
|
||||
|
||||
for(iptr = (unsigned char *)url; /* read from here */
|
||||
*iptr; /* until zero byte */
|
||||
iptr++) {
|
||||
len; iptr++, len--) {
|
||||
|
||||
if(iptr < host_sep) {
|
||||
if(Curl_dyn_addn(o, iptr, 1))
|
||||
@ -361,7 +360,7 @@ static char *concat_url(char *base, const char *relurl)
|
||||
}
|
||||
|
||||
/* then append the new piece on the right side */
|
||||
strcpy_url(&newest, useurl, !host_changed);
|
||||
urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
|
||||
|
||||
return Curl_dyn_ptr(&newest);
|
||||
}
|
||||
@ -1130,16 +1129,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
}
|
||||
}
|
||||
|
||||
if(*path && (flags & CURLU_URLENCODE)) {
|
||||
struct dynbuf enc;
|
||||
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
||||
if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
|
||||
result = CURLUE_OUT_OF_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
path = u->path = Curl_dyn_ptr(&enc);
|
||||
}
|
||||
|
||||
fragment = strchr(path, '#');
|
||||
if(fragment) {
|
||||
fraglen = strlen(fragment);
|
||||
@ -1163,12 +1152,25 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
size_t qlen = strlen(query) - fraglen; /* includes '?' */
|
||||
pathlen = strlen(path) - qlen - fraglen;
|
||||
if(qlen > 1) {
|
||||
u->query = Curl_memdup(query + 1, qlen);
|
||||
if(!u->query) {
|
||||
result = CURLUE_OUT_OF_MEMORY;
|
||||
goto fail;
|
||||
if(qlen && (flags & CURLU_URLENCODE)) {
|
||||
struct dynbuf enc;
|
||||
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
||||
/* skip the leading question mark */
|
||||
if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
|
||||
result = CURLUE_OUT_OF_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
qlen = Curl_dyn_len(&enc);
|
||||
query = u->query = Curl_dyn_ptr(&enc);
|
||||
}
|
||||
else {
|
||||
u->query = Curl_memdup(query + 1, qlen);
|
||||
if(!u->query) {
|
||||
result = CURLUE_OUT_OF_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
u->query[qlen - 1] = 0;
|
||||
}
|
||||
u->query[qlen - 1] = 0;
|
||||
|
||||
if(junkscan(u->query, flags)) {
|
||||
result = CURLUE_BAD_QUERY;
|
||||
@ -1187,6 +1189,17 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
else
|
||||
pathlen = strlen(path) - fraglen;
|
||||
|
||||
if(pathlen && (flags & CURLU_URLENCODE)) {
|
||||
struct dynbuf enc;
|
||||
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
||||
if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
|
||||
result = CURLUE_OUT_OF_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
pathlen = Curl_dyn_len(&enc);
|
||||
path = u->path = Curl_dyn_ptr(&enc);
|
||||
}
|
||||
|
||||
if(!pathlen) {
|
||||
/* there is no path left, unset */
|
||||
path = NULL;
|
||||
@ -1563,13 +1576,15 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
||||
break;
|
||||
}
|
||||
if(ptr) {
|
||||
*part = strdup(ptr);
|
||||
size_t partlen = strlen(ptr);
|
||||
size_t i = 0;
|
||||
*part = Curl_memdup(ptr, partlen + 1);
|
||||
if(!*part)
|
||||
return CURLUE_OUT_OF_MEMORY;
|
||||
if(plusdecode) {
|
||||
/* convert + to space */
|
||||
char *plus;
|
||||
for(plus = *part; *plus; ++plus) {
|
||||
char *plus = *part;
|
||||
for(i = 0; i < partlen; ++plus, i++) {
|
||||
if(*plus == '+')
|
||||
*plus = ' ';
|
||||
}
|
||||
@ -1586,11 +1601,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
||||
return CURLUE_URLDECODE;
|
||||
}
|
||||
*part = decoded;
|
||||
partlen = dlen;
|
||||
}
|
||||
if(urlencode) {
|
||||
struct dynbuf enc;
|
||||
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
|
||||
if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
|
||||
if(urlencode_str(&enc, *part, partlen, TRUE,
|
||||
what == CURLUPART_QUERY))
|
||||
return CURLUE_OUT_OF_MEMORY;
|
||||
free(*part);
|
||||
*part = Curl_dyn_ptr(&enc);
|
||||
|
@ -138,6 +138,12 @@ struct clearurlcase {
|
||||
};
|
||||
|
||||
static const struct testcase get_parts_list[] ={
|
||||
{"https://user@example.net?he l lo",
|
||||
"https | user | [12] | [13] | example.net | [15] | / | he+l+lo | [17]",
|
||||
CURLU_ALLOW_SPACE, CURLU_URLENCODE, CURLUE_OK},
|
||||
{"https://user@example.net?he l lo",
|
||||
"https | user | [12] | [13] | example.net | [15] | / | he l lo | [17]",
|
||||
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||
{"https://exam{}[]ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
||||
{"https://exam{ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
||||
{"https://exam}ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
|
||||
@ -849,6 +855,18 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
|
||||
}
|
||||
|
||||
static const struct redircase set_url_list[] = {
|
||||
{"http://example.com/please/../gimme/%TESTNUMBER?foobar#hello",
|
||||
"http://example.net/there/it/is/../../tes t case=/%TESTNUMBER0002? yes no",
|
||||
"http://example.net/there/tes%20t%20case=/%TESTNUMBER0002?+yes+no",
|
||||
0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
|
||||
{"http://local.test?redirect=http://local.test:80?-321",
|
||||
"http://local.test:80?-123",
|
||||
"http://local.test:80/?-123",
|
||||
0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
|
||||
{"http://local.test?redirect=http://local.test:80?-321",
|
||||
"http://local.test:80?-123",
|
||||
"http://local.test:80/?-123",
|
||||
0, 0, CURLUE_OK},
|
||||
{"http://example.org/static/favicon/wikipedia.ico",
|
||||
"//fake.example.com/licenses/by-sa/3.0/",
|
||||
"http://fake.example.com/licenses/by-sa/3.0/",
|
||||
|
Loading…
Reference in New Issue
Block a user