urlapi: detect scheme better when not guessing

When the parser is not allowed to guess scheme, it should consider the
word ending at the first colon to be the scheme, independently of number
of slashes.

The parser now checks that the scheme is known before it counts slashes,
to improve the error messge for URLs with unknown schemes and maybe no
slashes.

When following redirects, no scheme guessing is allowed and therefore
this change effectively prevents redirects to unknown schemes such as
"data".

Fixes #9503
This commit is contained in:
Daniel Stenberg 2022-09-14 09:18:30 +02:00
parent 7f5fe74323
commit 846678541b
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
4 changed files with 30 additions and 14 deletions

View File

@ -1637,7 +1637,7 @@ CURLcode Curl_follow(struct Curl_easy *data,
if((type != FOLLOW_RETRY) &&
(data->req.httpcode != 401) && (data->req.httpcode != 407) &&
Curl_is_absolute_url(newurl, NULL, 0))
Curl_is_absolute_url(newurl, NULL, 0, FALSE))
/* If this is not redirect due to a 401 or 407 response and an absolute
URL: don't allow a custom port number */
disallowport = TRUE;
@ -1649,8 +1649,11 @@ CURLcode Curl_follow(struct Curl_easy *data,
CURLU_ALLOW_SPACE |
(data->set.path_as_is ? CURLU_PATH_AS_IS : 0));
if(uc) {
if(type != FOLLOW_FAKE)
if(type != FOLLOW_FAKE) {
failf(data, "The redirect target URL could not be parsed: %s",
curl_url_strerror(uc));
return Curl_uc_to_curlcode(uc);
}
/* the URL could not be parsed for some reason, but since this is FAKE
mode, just duplicate the field as-is */

View File

@ -1995,7 +1995,7 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
return CURLE_OUT_OF_MEMORY;
if(data->set.str[STRING_DEFAULT_PROTOCOL] &&
!Curl_is_absolute_url(data->state.url, NULL, 0)) {
!Curl_is_absolute_url(data->state.url, NULL, 0, TRUE)) {
char *url = aprintf("%s://%s", data->set.str[STRING_DEFAULT_PROTOCOL],
data->state.url);
if(!url)

View File

@ -25,7 +25,8 @@
***************************************************************************/
#include "curl_setup.h"
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen);
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
bool guess_scheme);
#ifdef DEBUGBUILD
CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,

View File

@ -184,8 +184,12 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
* Returns the length of the scheme if the given URL is absolute (as opposed
* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
*
* If 'guess_scheme' is TRUE, it means the URL might be provided without
* scheme.
*/
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
bool guess_scheme)
{
int i;
DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
@ -193,7 +197,7 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
if(buf)
buf[0] = 0; /* always leave a defined value in buf */
#ifdef WIN32
if(STARTS_WITH_DRIVE_PREFIX(url))
if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
return 0;
#endif
for(i = 0; i < MAX_SCHEME_LEN; ++i) {
@ -207,7 +211,11 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
break;
}
}
if(i && (url[i] == ':') && (url[i + 1] == '/')) {
if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
/* If this does not guess scheme, the scheme always ends with the colon so
that this also detects data: URLs etc. In guessing mode, data: could
be the host name "data" with a specified port number. */
/* the length of the scheme is the name part only */
size_t len = i;
if(buf) {
@ -934,7 +942,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
goto fail;
}
schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf));
schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
flags & (CURLU_GUESS_SCHEME|
CURLU_DEFAULT_SCHEME));
/* handle the file: scheme */
if(schemelen && !strcmp(schemebuf, "file")) {
@ -1059,11 +1069,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
p++;
i++;
}
if((i < 1) || (i>3)) {
/* less than one or more than three slashes */
result = CURLUE_BAD_SLASHES;
goto fail;
}
schemep = schemebuf;
if(!Curl_builtin_scheme(schemep) &&
@ -1072,6 +1077,11 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
goto fail;
}
if((i < 1) || (i>3)) {
/* less than one or more than three slashes */
result = CURLUE_BAD_SLASHES;
goto fail;
}
if(junkscan(schemep, flags)) {
result = CURLUE_BAD_SCHEME;
goto fail;
@ -1730,7 +1740,9 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
/* if the new thing is absolute or the old one is not
* (we could not get an absolute url in 'oldurl'),
* then replace the existing with the new. */
if(Curl_is_absolute_url(part, NULL, 0)
if(Curl_is_absolute_url(part, NULL, 0,
flags & (CURLU_GUESS_SCHEME|
CURLU_DEFAULT_SCHEME))
|| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
return parseurl_and_replace(part, u, flags);
}