mirror of
https://github.com/curl/curl.git
synced 2025-01-18 14:04:30 +08:00
urlapi: detect scheme better when not guessing
When the parser is not allowed to guess scheme, it should consider the word ending at the first colon to be the scheme, independently of number of slashes. The parser now checks that the scheme is known before it counts slashes, to improve the error messge for URLs with unknown schemes and maybe no slashes. When following redirects, no scheme guessing is allowed and therefore this change effectively prevents redirects to unknown schemes such as "data". Fixes #9503
This commit is contained in:
parent
7f5fe74323
commit
846678541b
@ -1637,7 +1637,7 @@ CURLcode Curl_follow(struct Curl_easy *data,
|
||||
|
||||
if((type != FOLLOW_RETRY) &&
|
||||
(data->req.httpcode != 401) && (data->req.httpcode != 407) &&
|
||||
Curl_is_absolute_url(newurl, NULL, 0))
|
||||
Curl_is_absolute_url(newurl, NULL, 0, FALSE))
|
||||
/* If this is not redirect due to a 401 or 407 response and an absolute
|
||||
URL: don't allow a custom port number */
|
||||
disallowport = TRUE;
|
||||
@ -1649,8 +1649,11 @@ CURLcode Curl_follow(struct Curl_easy *data,
|
||||
CURLU_ALLOW_SPACE |
|
||||
(data->set.path_as_is ? CURLU_PATH_AS_IS : 0));
|
||||
if(uc) {
|
||||
if(type != FOLLOW_FAKE)
|
||||
if(type != FOLLOW_FAKE) {
|
||||
failf(data, "The redirect target URL could not be parsed: %s",
|
||||
curl_url_strerror(uc));
|
||||
return Curl_uc_to_curlcode(uc);
|
||||
}
|
||||
|
||||
/* the URL could not be parsed for some reason, but since this is FAKE
|
||||
mode, just duplicate the field as-is */
|
||||
|
@ -1995,7 +1995,7 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
|
||||
return CURLE_OUT_OF_MEMORY;
|
||||
|
||||
if(data->set.str[STRING_DEFAULT_PROTOCOL] &&
|
||||
!Curl_is_absolute_url(data->state.url, NULL, 0)) {
|
||||
!Curl_is_absolute_url(data->state.url, NULL, 0, TRUE)) {
|
||||
char *url = aprintf("%s://%s", data->set.str[STRING_DEFAULT_PROTOCOL],
|
||||
data->state.url);
|
||||
if(!url)
|
||||
|
@ -25,7 +25,8 @@
|
||||
***************************************************************************/
|
||||
#include "curl_setup.h"
|
||||
|
||||
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen);
|
||||
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
|
||||
bool guess_scheme);
|
||||
|
||||
#ifdef DEBUGBUILD
|
||||
CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
|
||||
|
32
lib/urlapi.c
32
lib/urlapi.c
@ -184,8 +184,12 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
|
||||
* Returns the length of the scheme if the given URL is absolute (as opposed
|
||||
* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
|
||||
* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
|
||||
*
|
||||
* If 'guess_scheme' is TRUE, it means the URL might be provided without
|
||||
* scheme.
|
||||
*/
|
||||
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
|
||||
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
|
||||
bool guess_scheme)
|
||||
{
|
||||
int i;
|
||||
DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
|
||||
@ -193,7 +197,7 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
|
||||
if(buf)
|
||||
buf[0] = 0; /* always leave a defined value in buf */
|
||||
#ifdef WIN32
|
||||
if(STARTS_WITH_DRIVE_PREFIX(url))
|
||||
if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
|
||||
return 0;
|
||||
#endif
|
||||
for(i = 0; i < MAX_SCHEME_LEN; ++i) {
|
||||
@ -207,7 +211,11 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(i && (url[i] == ':') && (url[i + 1] == '/')) {
|
||||
if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
|
||||
/* If this does not guess scheme, the scheme always ends with the colon so
|
||||
that this also detects data: URLs etc. In guessing mode, data: could
|
||||
be the host name "data" with a specified port number. */
|
||||
|
||||
/* the length of the scheme is the name part only */
|
||||
size_t len = i;
|
||||
if(buf) {
|
||||
@ -934,7 +942,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf));
|
||||
schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
|
||||
flags & (CURLU_GUESS_SCHEME|
|
||||
CURLU_DEFAULT_SCHEME));
|
||||
|
||||
/* handle the file: scheme */
|
||||
if(schemelen && !strcmp(schemebuf, "file")) {
|
||||
@ -1059,11 +1069,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
p++;
|
||||
i++;
|
||||
}
|
||||
if((i < 1) || (i>3)) {
|
||||
/* less than one or more than three slashes */
|
||||
result = CURLUE_BAD_SLASHES;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
schemep = schemebuf;
|
||||
if(!Curl_builtin_scheme(schemep) &&
|
||||
@ -1072,6 +1077,11 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if((i < 1) || (i>3)) {
|
||||
/* less than one or more than three slashes */
|
||||
result = CURLUE_BAD_SLASHES;
|
||||
goto fail;
|
||||
}
|
||||
if(junkscan(schemep, flags)) {
|
||||
result = CURLUE_BAD_SCHEME;
|
||||
goto fail;
|
||||
@ -1730,7 +1740,9 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
|
||||
/* if the new thing is absolute or the old one is not
|
||||
* (we could not get an absolute url in 'oldurl'),
|
||||
* then replace the existing with the new. */
|
||||
if(Curl_is_absolute_url(part, NULL, 0)
|
||||
if(Curl_is_absolute_url(part, NULL, 0,
|
||||
flags & (CURLU_GUESS_SCHEME|
|
||||
CURLU_DEFAULT_SCHEME))
|
||||
|| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
|
||||
return parseurl_and_replace(part, u, flags);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user