URL parser: allow URLs to use one, two or three slashes

Mostly in order to support broken web sites that redirect to broken URLs
that are accepted by browsers.

Browsers are typically even more leniant than this as the WHATWG URL
spec they should allow an _infinite_ amount. I tested 8000 slashes with
Firefox and it just worked.

Added test case 1141, 1142 and 1143 to verify the new parser.

Closes #791
This commit is contained in:
Daniel Stenberg 2016-05-08 15:11:10 +02:00
parent ed8b8f2456
commit 5409e1d793
5 changed files with 200 additions and 6 deletions

View File

@ -4141,12 +4141,17 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
}
else {
/* clear path */
char slashbuf[4];
path[0]=0;
if(2 > sscanf(data->change.url,
"%15[^\n:]://%[^\n/?]%[^\n]",
protobuf,
conn->host.name, path)) {
rc = sscanf(data->change.url,
"%15[^\n:]:%3[/]%[^\n/?]%[^\n]",
protobuf, slashbuf, conn->host.name, path);
if(2 == rc) {
failf(data, "Bad URL");
return CURLE_URL_MALFORMAT;
}
if(3 > rc) {
/*
* The URL was badly formatted, let's try the browser-style _without_
@ -4197,8 +4202,23 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
*prot_missing = TRUE; /* not given in URL */
}
else
else {
size_t s = strlen(slashbuf);
protop = protobuf;
if(s != 2) {
infof(data, "Unwillingly accepted illegal URL using %d slash%s!\n",
s, s>1?"es":"");
if(data->change.url_alloc)
free(data->change.url);
/* repair the URL to use two slashes */
data->change.url = aprintf("%s://%s%s",
protobuf, conn->host.name, path);
if(!data->change.url)
return CURLE_OUT_OF_MEMORY;
data->change.url_alloc = TRUE;
}
}
}
/* We search for '?' in the host name (but only on the right side of a

View File

@ -119,7 +119,7 @@ test1104 test1105 test1106 test1107 test1108 test1109 test1110 test1111 \
test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 \
test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 \
test1128 test1129 test1130 test1131 test1132 test1133 test1134 test1135 \
test1136 test1137 test1138 test1139 test1140 \
test1136 test1137 test1138 test1139 test1140 test1141 test1142 test1143 \
\
test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \
test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \

67
tests/data/test1141 Normal file
View File

@ -0,0 +1,67 @@
<testcase>
<info>
<keywords>
HTTP
HTTP GET
followlocation
</keywords>
</info>
# Server-side
<reply>
<data>
HTTP/1.1 302 This is a weirdo text message
Connection: close
Location: http:///foo.example.com/want/11410001
This server reply is for testing
</data>
<data1>
HTTP/1.1 200 hello
Connection: close
Content-Length: 4
hej
</data1>
<datacheck>
HTTP/1.1 302 This is a weirdo text message
Connection: close
Location: http:///foo.example.com/want/11410001
HTTP/1.1 200 hello
Connection: close
Content-Length: 4
hej
</datacheck>
</reply>
# Client-side
<client>
<server>
http
</server>
<name>
HTTP redirect to http:/// (three slashes!)
</name>
<command>
%HOSTIP:%HTTPPORT/want/1141 -L -x http://%HOSTIP:%HTTPPORT
</command>
</client>
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<protocol>
GET http://%HOSTIP:%HTTPPORT/want/1141 HTTP/1.1
Host: %HOSTIP:%HTTPPORT
Accept: */*
GET http://foo.example.com/want/11410001 HTTP/1.1
Host: foo.example.com
Accept: */*
</protocol>
</verify>
</testcase>

62
tests/data/test1142 Normal file
View File

@ -0,0 +1,62 @@
<testcase>
<info>
<keywords>
HTTP
HTTP GET
followlocation
</keywords>
</info>
# Server-side
<reply>
<data>
HTTP/1.1 302 This is a weirdo text message
Connection: close
Location: http:////foo.example.com/want/11420001
This server reply is for testing
</data>
<data1>
HTTP/1.1 200 hello
Connection: close
Content-Length: 4
hej
</data1>
<datacheck>
HTTP/1.1 302 This is a weirdo text message
Connection: close
Location: http:////foo.example.com/want/11420001
</datacheck>
</reply>
# Client-side
<client>
<server>
http
</server>
<name>
HTTP redirect to http://// (four slashes!)
</name>
<command>
%HOSTIP:%HTTPPORT/want/1142 -L -x http://%HOSTIP:%HTTPPORT
</command>
</client>
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<protocol>
GET http://%HOSTIP:%HTTPPORT/want/1142 HTTP/1.1
Host: %HOSTIP:%HTTPPORT
Accept: */*
</protocol>
# 3, CURLE_URL_MALFORMAT for the four slashes
<errorcode>
3
</errorcode>
</verify>
</testcase>

45
tests/data/test1143 Normal file
View File

@ -0,0 +1,45 @@
<testcase>
<info>
<keywords>
HTTP
HTTP GET
followlocation
</keywords>
</info>
# Server-side
<reply>
<data>
HTTP/1.1 200 hello
Connection: close
Content-Length: 4
hej
</data>
</reply>
# Client-side
<client>
<server>
http
</server>
<name>
HTTP URL with http:/ (one slash!)
</name>
<command>
http:/%HOSTIP:%HTTPPORT/want/1143
</command>
</client>
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<protocol>
GET /want/1143 HTTP/1.1
Host: %HOSTIP:%HTTPPORT
Accept: */*
</protocol>
</verify>
</testcase>