- To make it easier for applications that want lots of magic stuff done on

redirections and thus cannot use CURLOPT_FOLLOWLOCATION easily, we now
  introduce the new CURLINFO_REDIRECT_URL option that lets applications
  extract the URL libcurl would've redirected to if it had been told to. This
  then enables the application to continue to that URL as it thinks is
  suitable, without having to re-implement the magic of creating the new URL
  from the Location: header etc. Test 1029 verifies it.
This commit is contained in:
Daniel Stenberg 2008-04-30 21:20:08 +00:00
parent 7dfdbf8fbe
commit 852989856d
14 changed files with 218 additions and 66 deletions

View File

@ -7,6 +7,15 @@
Changelog
Daniel Stenberg (29 Apr 2008)
- To make it easier for applications that want lots of magic stuff done on
redirections and thus cannot use CURLOPT_FOLLOWLOCATION easily, we now
introduce the new CURLINFO_REDIRECT_URL option that lets applications
extract the URL libcurl would've redirected to if it had been told to. This
then enables the application to continue to that URL as it thinks is
suitable, without having to re-implement the magic of creating the new URL
from the Location: header etc. Test 1029 verifies it.
Yang Tse (29 Apr 2008)
- Improved easy interface resolving timeout handling in c-ares enabled builds

View File

@ -13,6 +13,7 @@ This release includes the following changes:
o CURLFORM_STREAM was added
o CURLOPT_NOBODY is now supported over SFTP
o curl can now run on Symbian OS
o curl -w redirect_url and CURLINFO_REDIRECT_URL
This release includes the following bugfixes:

View File

@ -1288,7 +1288,9 @@ The URL that was fetched last. This is mostly meaningful if you've told curl
to follow location: headers.
.TP
.B http_code
The numerical code that was found in the last retrieved HTTP(S) page.
The numerical response code that was found in the last retrieved HTTP(S) or
FTP(s) transfer. In 7.18.2 the alias \fBresponse_code\fP was added to show the
same info.
.TP
.B http_connect
The numerical code that was found in the last response (from a proxy) to a
@ -1349,6 +1351,10 @@ Number of new connects made in the recent transfer. (Added in 7.12.3)
.B num_redirects
Number of redirects that were followed in the request. (Added in 7.12.3)
.TP
.B redirect_url
When a HTTP request was made without -L to follow redirects, this variable
will show the actual URL a redirect \fIwould\fP take you to. (Added in 7.18.2)
.TP
.B ftp_entry_path
The initial path libcurl ended up in when logging on to the remote FTP
server. (Added in 7.15.4)

View File

@ -5,7 +5,7 @@
.\" * | (__| |_| | _ <| |___
.\" * \___|\___/|_| \_\_____|
.\" *
.\" * Copyright (C) 1998 - 2007, Daniel Stenberg, <daniel@haxx.se>, et al.
.\" * Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al.
.\" *
.\" * This software is licensed as described in the file COPYING, which
.\" * you should have received as part of this distribution. The terms
@ -89,6 +89,12 @@ complete execution time for multiple redirections. (Added in 7.9.7)
.IP CURLINFO_REDIRECT_COUNT
Pass a pointer to a long to receive the total number of redirections that were
actually followed. (Added in 7.9.7)
.IP CURLINFO_REDIRECT_URL
Pass a pointer to a char pointer to receive the URL a redirect \fIwould\fP
take you to if you would enable CURLOPT_FOLLOWLOCATION. This can come very
handy if you think using the built-in libcurl redirect logic isn't good enough
for you but you would still prefer to avoid implementing all the magic of
figuring out the new URL. (Added in 7.18.2)
.IP CURLINFO_SIZE_UPLOAD
Pass a pointer to a double to receive the total amount of bytes that were
uploaded.

View File

@ -1587,9 +1587,10 @@ typedef enum {
CURLINFO_COOKIELIST = CURLINFO_SLIST + 28,
CURLINFO_LASTSOCKET = CURLINFO_LONG + 29,
CURLINFO_FTP_ENTRY_PATH = CURLINFO_STRING + 30,
CURLINFO_REDIRECT_URL = CURLINFO_STRING + 31,
/* Fill in new entries below here! */
CURLINFO_LASTONE = 30
CURLINFO_LASTONE = 31
} CURLINFO;
/* CURLINFO_RESPONSE_CODE is the new name for the option previously known as

View File

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2007, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -231,6 +231,11 @@ CURLcode Curl_getinfo(struct SessionHandle *data, CURLINFO info, ...)
else
*param_longp = -1;
break;
case CURLINFO_REDIRECT_URL:
/* Return the URL this request would have been redirected to if that
option had been enabled! */
*param_charp = data->info.wouldredirect;
break;
default:
return CURLE_BAD_FUNCTION_ARGUMENT;
}

View File

@ -1262,6 +1262,7 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi,
else if(TRUE == done) {
char *newurl;
bool retry = Curl_retry_request(easy->easy_conn, &newurl);
followtype follow=FOLLOW_NONE;
/* call this even if the readwrite function returned error */
Curl_posttransfer(easy->easy_handle);
@ -1278,10 +1279,13 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi,
then figure out the URL here */
newurl = easy->easy_handle->req.newurl;
easy->easy_handle->req.newurl = NULL;
follow = FOLLOW_REDIR;
}
else
follow = FOLLOW_RETRY;
easy->result = Curl_done(&easy->easy_conn, CURLE_OK, FALSE);
if(easy->result == CURLE_OK)
easy->result = Curl_follow(easy->easy_handle, newurl, retry);
easy->result = Curl_follow(easy->easy_handle, newurl, follow);
if(CURLE_OK == easy->result) {
multistate(easy, CURLM_STATE_CONNECT);
result = CURLM_CALL_MULTI_PERFORM;

View File

@ -250,7 +250,7 @@ CURLcode Curl_readrewind(struct connectdata *conn)
err = (data->set.seek_func)(data->set.seek_client, 0, SEEK_SET);
if(err) {
failf(data, "seek callback returned error %d", (int)err);
failf(data, "seek callback returned error %d", (int)err);
return CURLE_SEND_FAIL_REWIND;
}
}
@ -1113,34 +1113,37 @@ CURLcode Curl_readwrite(struct connectdata *conn,
}
else if((k->httpcode >= 300 && k->httpcode < 400) &&
checkprefix("Location:", k->p)) {
if(data->set.http_follow_location) {
/* this is the URL that the server advices us to get instead */
char *ptr;
char *start=k->p;
char backup;
/* this is the URL that the server advices us to use instead */
char *ptr;
char *start=k->p;
char backup;
start += 9; /* pass "Location:" */
start += 9; /* pass "Location:" */
/* Skip spaces and tabs. We do this to support multiple
white spaces after the "Location:" keyword. */
while(*start && ISSPACE(*start ))
start++;
/* Skip spaces and tabs. We do this to support multiple
white spaces after the "Location:" keyword. */
while(*start && ISSPACE(*start ))
start++;
/* Scan through the string from the end to find the last
non-space. k->end_ptr points to the actual terminating zero
letter, move pointer one letter back and start from
there. This logic strips off trailing whitespace, but keeps
any embedded whitespace. */
ptr = k->end_ptr-1;
while((ptr>=start) && ISSPACE(*ptr))
ptr--;
ptr++;
/* Scan through the string from the end to find the last
non-space. k->end_ptr points to the actual terminating zero
letter, move pointer one letter back and start from
there. This logic strips off trailing whitespace, but keeps
any embedded whitespace. */
ptr = k->end_ptr-1;
while((ptr>=start) && ISSPACE(*ptr))
ptr--;
ptr++;
backup = *ptr; /* store the ending letter */
if(ptr != start) {
*ptr = '\0'; /* zero terminate */
data->req.newurl = strdup(start); /* clone string */
*ptr = backup; /* restore ending letter */
backup = *ptr; /* store the ending letter */
if(ptr != start) {
*ptr = '\0'; /* zero terminate */
data->req.location = strdup(start); /* clone string */
*ptr = backup; /* restore ending letter */
if(!data->req.location)
return CURLE_OUT_OF_MEMORY;
if(data->set.http_follow_location) {
data->req.newurl = strdup(data->req.location); /* clone */
if(!data->req.newurl)
return CURLE_OUT_OF_MEMORY;
}
@ -1969,16 +1972,16 @@ CURLcode Curl_follow(struct SessionHandle *data,
char *newurl, /* this 'newurl' is the Location: string,
and it must be malloc()ed before passed
here */
bool retry) /* set TRUE if this is a request retry as
opposed to a real redirect following */
followtype type) /* see transfer.h */
{
/* Location: redirect */
char prot[16]; /* URL protocol string storage */
char letter; /* used for a silly sscanf */
size_t newlen;
char *newest;
bool disallowport = FALSE;
if(!retry) {
if(type == FOLLOW_REDIR) {
if((data->set.maxredirs != -1) &&
(data->set.followlocation >= data->set.maxredirs)) {
failf(data,"Maximum (%d) redirects followed", data->set.maxredirs);
@ -1989,19 +1992,19 @@ CURLcode Curl_follow(struct SessionHandle *data,
data->state.this_is_a_follow = TRUE;
data->set.followlocation++; /* count location-followers */
}
if(data->set.http_auto_referer) {
/* We are asked to automatically set the previous URL as the
referer when we get the next URL. We pick the ->url field,
which may or may not be 100% correct */
if(data->set.http_auto_referer) {
/* We are asked to automatically set the previous URL as the referer
when we get the next URL. We pick the ->url field, which may or may
not be 100% correct */
if(data->change.referer_alloc)
/* If we already have an allocated referer, free this first */
free(data->change.referer);
if(data->change.referer_alloc)
/* If we already have an allocated referer, free this first */
free(data->change.referer);
data->change.referer = strdup(data->change.url);
data->change.referer_alloc = TRUE; /* yes, free this later */
data->change.referer = strdup(data->change.url);
data->change.referer_alloc = TRUE; /* yes, free this later */
}
}
if(2 != sscanf(newurl, "%15[^?&/:]://%c", prot, &letter)) {
@ -2141,7 +2144,7 @@ CURLcode Curl_follow(struct SessionHandle *data,
}
else {
/* This is an absolute URL, don't allow the custom port number */
data->state.allow_port = FALSE;
disallowport = TRUE;
if(strchr(newurl, ' ')) {
/* This new URL contains at least one space, this is a mighty stupid
@ -2159,6 +2162,16 @@ CURLcode Curl_follow(struct SessionHandle *data,
}
if(type == FOLLOW_FAKE) {
/* we're only figuring out the new url if we would've followed locations
but now we're done so we can get out! */
data->info.wouldredirect = newurl;
return CURLE_OK;
}
if(disallowport)
data->state.allow_port = FALSE;
if(data->change.url_alloc)
free(data->change.url);
else
@ -2289,7 +2302,9 @@ connect_host(struct SessionHandle *data,
return res;
}
/* Returns TRUE and sets '*url' if a request retry is wanted */
/* Returns TRUE and sets '*url' if a request retry is wanted.
NOTE: that the *url is malloc()ed. */
bool Curl_retry_request(struct connectdata *conn,
char **url)
{
@ -2335,7 +2350,7 @@ CURLcode Curl_perform(struct SessionHandle *data)
CURLcode res2;
struct connectdata *conn=NULL;
char *newurl = NULL; /* possibly a new URL to follow to! */
bool retry = FALSE;
int follow = FOLLOW_NONE;
data->state.used_interface = Curl_if_easy;
@ -2366,14 +2381,29 @@ CURLcode Curl_perform(struct SessionHandle *data)
if(res == CURLE_OK) {
res = Transfer(conn); /* now fetch that URL please */
if(res == CURLE_OK) {
retry = Curl_retry_request(conn, &newurl);
bool retry = Curl_retry_request(conn, &newurl);
if(!retry)
if(retry)
follow = FOLLOW_RETRY;
else {
/*
* We must duplicate the new URL here as the connection data may
* be free()ed in the Curl_done() function.
* be free()ed in the Curl_done() function. We prefer the newurl
* one since that's used for redirects or just further requests
* for retries or multi-stage HTTP auth methods etc.
*/
newurl = data->req.newurl?strdup(data->req.newurl):NULL;
if(data->req.newurl) {
follow = FOLLOW_REDIR;
newurl = strdup(data->req.newurl);
}
else if(data->req.location) {
follow = FOLLOW_FAKE;
newurl = strdup(data->req.location);
}
}
/* in the above cases where 'newurl' gets assigned, we have a fresh
* allocated memory pointed to */
}
else {
/* The transfer phase returned error, we mark the connection to get
@ -2409,11 +2439,17 @@ CURLcode Curl_perform(struct SessionHandle *data)
* in 'Curl_done' or other functions.
*/
if((res == CURLE_OK) && newurl) {
res = Curl_follow(data, newurl, retry);
if((res == CURLE_OK) && follow) {
res = Curl_follow(data, newurl, follow);
if(CURLE_OK == res) {
/* if things went fine, Curl_follow() freed or otherwise took
responsibility for the newurl pointer */
newurl = NULL;
continue;
if(follow >= FOLLOW_RETRY) {
follow = FOLLOW_NONE;
continue;
}
/* else we break out of the loop below */
}
}
}

View File

@ -7,7 +7,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2007, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -26,7 +26,20 @@ CURLcode Curl_perform(struct SessionHandle *data);
CURLcode Curl_pretransfer(struct SessionHandle *data);
CURLcode Curl_second_connect(struct connectdata *conn);
CURLcode Curl_posttransfer(struct SessionHandle *data);
CURLcode Curl_follow(struct SessionHandle *data, char *newurl, bool retry);
typedef enum {
FOLLOW_NONE, /* not used within the function, just a placeholder to
allow initing to this */
FOLLOW_FAKE, /* only records stuff, not actually following */
FOLLOW_RETRY, /* set if this is a request retry as opposed to a real
redirect following */
FOLLOW_REDIR, /* a full true redirect */
FOLLOW_LAST /* never used */
} followtype;
CURLcode Curl_follow(struct SessionHandle *data, char *newurl, followtype type);
CURLcode Curl_readwrite(struct connectdata *conn, bool *done);
int Curl_single_getsock(const struct connectdata *conn,
curl_socket_t *socks,

View File

@ -494,6 +494,7 @@ CURLcode Curl_close(struct SessionHandle *data)
Curl_digest_cleanup(data);
Curl_safefree(data->info.contenttype);
Curl_safefree(data->info.wouldredirect);
/* this destroys the channel and we cannot use it anymore after this */
ares_destroy(data->state.areschannel);
@ -4440,6 +4441,10 @@ CURLcode Curl_done(struct connectdata **connp,
free(data->req.newurl);
data->req.newurl = NULL;
}
if(data->req.location) {
free(data->req.location);
data->req.location = NULL;
}
if(conn->dns_entry) {
Curl_resolv_unlock(data, conn->dns_entry); /* done with this */

View File

@ -760,8 +760,10 @@ struct SingleRequest {
bool ignorecl; /* This HTTP response has no body so we ignore the Content-
Length: header */
char *newurl; /* This can only be set if a Location: was in the
document headers */
char *location; /* This points to an allocated version of the Location:
header data */
char *newurl; /* Set to the new URL to use when a redirect or a retry is
wanted */
/* 'upload_present' is used to keep a byte counter of how much data there is
still left in the buffer, aimed for upload. */
@ -1021,21 +1023,19 @@ struct connectdata {
*/
struct PureInfo {
int httpcode; /* Recent HTTP or FTP response code */
int httpproxycode;
int httpversion;
int httpproxycode; /* response code from proxy when received separate */
int httpversion; /* the http version number X.Y = X*10+Y */
long filetime; /* If requested, this is might get set. Set to -1 if the time
was unretrievable. We cannot have this of type time_t,
since time_t is unsigned on several platforms such as
OpenVMS. */
long header_size; /* size of read header(s) in bytes */
long request_size; /* the amount of bytes sent in the request(s) */
long proxyauthavail;
long httpauthavail;
long proxyauthavail; /* what proxy auth types were announced */
long httpauthavail; /* what host auth types were announced */
long numconnects; /* how many new connection did libcurl created */
char *contenttype; /* the content type of the object */
char *wouldredirect; /* URL this would've been redirected to if asked to */
};

View File

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2006, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -61,6 +61,7 @@ typedef enum {
VAR_REDIRECT_TIME,
VAR_REDIRECT_COUNT,
VAR_FTP_ENTRY_PATH,
VAR_REDIRECT_URL,
VAR_NUM_OF_VARS /* must be the last */
} replaceid;
@ -73,6 +74,7 @@ struct variable {
static const struct variable replacements[]={
{"url_effective", VAR_EFFECTIVE_URL},
{"http_code", VAR_HTTP_CODE},
{"response_code", VAR_HTTP_CODE},
{"http_connect", VAR_HTTP_CODE_PROXY},
{"time_total", VAR_TOTAL_TIME},
{"time_namelookup", VAR_NAMELOOKUP_TIME},
@ -90,6 +92,7 @@ static const struct variable replacements[]={
{"time_redirect", VAR_REDIRECT_TIME},
{"num_redirects", VAR_REDIRECT_COUNT},
{"ftp_entry_path", VAR_FTP_ENTRY_PATH},
{"redirect_url", VAR_REDIRECT_URL},
{NULL, VAR_NONE}
};
@ -222,6 +225,12 @@ void ourWriteOut(CURL *curl, const char *writeinfo)
&& stringp)
fputs(stringp, stream);
break;
case VAR_REDIRECT_URL:
if((CURLE_OK ==
curl_easy_getinfo(curl, CURLINFO_REDIRECT_URL, &stringp))
&& stringp)
fputs(stringp, stream);
break;
default:
break;
}

View File

@ -50,11 +50,12 @@ EXTRA_DIST = test1 test108 test117 test127 test20 test27 test34 test46 \
test551 test552 test1016 test1017 test1018 test1019 test1020 test553 \
test1021 test1022 test1023 test309 test616 test617 test618 test619 \
test620 test621 test622 test623 test624 test625 test626 test627 test554 \
test1024 test1025 test555 test1026 test1027 test1028
test1024 test1025 test555 test1026 test1027 test1028 test1029
filecheck:
@mkdir test-place; \
cp "$(top_srcdir)"/tests/data/test[0-9]* test-place/; \
rm test-place/*~; \
for f in $(EXTRA_DIST); do \
if test -f "$(top_srcdir)/tests/data/$$f"; then \
rm -f test-place/$$f; \

56
tests/data/test1029 Normal file
View File

@ -0,0 +1,56 @@
<testcase>
<info>
<keywords>
HTTP
HTTP GET
redirect_url
followlocation
</keywords>
</info>
# Server-side
<reply>
<data nocheck="1">
HTTP/1.1 301 This is a weirdo text message swsclose
Location: data/10290002.txt?coolsite=yes
Content-Length: 62
Connection: close
This server reply is for testing a simple Location: following
</data>
</reply>
# Client-side
<client>
<server>
http
</server>
<name>
HTTP Location: and 'redirect_url' check
</name>
<command>
http://%HOSTIP:%HTTPPORT/we/want/our/1029 -w '%{redirect_url}\n'
</command>
</client>
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<protocol>
GET /we/want/our/1029 HTTP/1.1
Host: %HOSTIP:%HTTPPORT
Accept: */*
</protocol>
<stdout mode="text">
HTTP/1.1 301 This is a weirdo text message swsclose
Location: data/10290002.txt?coolsite=yes
Content-Length: 62
Connection: close
This server reply is for testing a simple Location: following
http://127.0.0.1:8990/we/want/our/data/10290002.txt?coolsite=yes
</stdout>
</verify>
</testcase>