curl/lib/transfer.c

821 lines
28 KiB
C

/*****************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 2000, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* In order to be useful for every potential user, curl and libcurl are
* dual-licensed under the MPL and the MIT/X-derivate licenses.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the MPL or the MIT/X-derivate
* licenses. You may pick one of these licenses.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
* $Id$
*****************************************************************************/
#include "setup.h"
/* -- WIN32 approved -- */
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include "strequal.h"
#if defined(WIN32) && !defined(__GNUC__) || defined(__MINGW32__)
#include <winsock.h>
#include <time.h>
#include <io.h>
#else
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#include <netinet/in.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <netdb.h>
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#ifdef HAVE_NET_IF_H
#include <net/if.h>
#endif
#include <sys/ioctl.h>
#include <signal.h>
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#ifndef HAVE_SELECT
#error "We can't compile without select() support!"
#endif
#ifndef HAVE_SOCKET
#error "We can't compile without socket() support!"
#endif
#endif
#include "urldata.h"
#include <curl/curl.h>
#include <curl/types.h>
#include "netrc.h"
#include "getenv.h"
#include "hostip.h"
#include "transfer.h"
#include "sendf.h"
#include "speedcheck.h"
#include "getpass.h"
#include "progress.h"
#include "getdate.h"
#define _MPRINTF_REPLACE /* use our functions only */
#include <curl/mprintf.h>
/* The last #include file should be: */
#ifdef MALLOCDEBUG
#include "memdebug.h"
#endif
#ifndef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
/* Parts of this function was written by the friendly Mark Butler
<butlerm@xmission.com>. */
CURLcode static
_Transfer(struct connectdata *c_conn)
{
size_t nread; /* number of bytes read */
int bytecount = 0; /* total number of bytes read */
int writebytecount = 0; /* number of bytes written */
long contentlength=0; /* size of incoming data */
struct timeval start = Curl_tvnow();
struct timeval now = start; /* current time */
bool header = TRUE; /* incoming data has HTTP header */
int headerline = 0; /* counts header lines to better track the
first one */
char *hbufp; /* points at *end* of header line */
int hbuflen = 0;
char *str; /* within buf */
char *str_start; /* within buf */
char *end_ptr; /* within buf */
char *p; /* within headerbuff */
bool content_range = FALSE; /* set TRUE if Content-Range: was found */
int offset = 0; /* possible resume offset read from the
Content-Range: header */
int code = 0; /* error code from the 'HTTP/1.? XXX' line */
/* for the low speed checks: */
CURLcode urg;
time_t timeofdoc=0;
long bodywrites=0;
int writetype;
/* the highest fd we use + 1 */
struct UrlData *data;
struct connectdata *conn = (struct connectdata *)c_conn;
char *buf;
int maxfd;
if(!conn || (conn->handle != STRUCT_CONNECT))
return CURLE_BAD_FUNCTION_ARGUMENT;
data = conn->data; /* there's the root struct */
buf = data->buffer;
maxfd = (conn->sockfd>conn->writesockfd?conn->sockfd:conn->writesockfd)+1;
hbufp = data->headerbuff;
myalarm (0); /* switch off the alarm-style timeout */
now = Curl_tvnow();
start = now;
#define KEEP_READ 1
#define KEEP_WRITE 2
Curl_pgrsTime(data, TIMER_PRETRANSFER);
Curl_speedinit(data);
if((conn->sockfd == -1) &&
(conn->writesockfd == -1)) {
/* nothing to read, nothing to write, we're already OK! */
return CURLE_OK;
}
if (!conn->getheader) {
header = FALSE;
if(conn->size > 0)
Curl_pgrsSetDownloadSize(data, conn->size);
}
/* we want header and/or body, if neither then don't do this! */
if(conn->getheader ||
!data->bits.no_body) {
fd_set readfd;
fd_set writefd;
fd_set rkeepfd;
fd_set wkeepfd;
struct timeval interval;
int keepon=0;
/* timeout every X second
- makes a better progressmeter (i.e even when no data is read, the
meter can be updated and reflect reality)
- allows removal of the alarm() crap
- variable timeout is easier
*/
FD_ZERO (&readfd); /* clear it */
if(conn->sockfd != -1) {
FD_SET (conn->sockfd, &readfd); /* read socket */
keepon |= KEEP_READ;
}
FD_ZERO (&writefd); /* clear it */
if(conn->writesockfd != -1) {
FD_SET (conn->writesockfd, &writefd); /* write socket */
keepon |= KEEP_WRITE;
}
/* get these in backup variables to be able to restore them on each lap in
the select() loop */
rkeepfd = readfd;
wkeepfd = writefd;
while (keepon) {
readfd = rkeepfd; /* set those every lap in the loop */
writefd = wkeepfd;
interval.tv_sec = 1;
interval.tv_usec = 0;
switch (select (maxfd, &readfd, &writefd, NULL, &interval)) {
case -1: /* select() error, stop reading */
#ifdef EINTR
/* The EINTR is not serious, and it seems you might get this more
ofen when using the lib in a multi-threaded environment! */
if(errno == EINTR)
;
else
#endif
keepon = 0; /* no more read or write */
continue;
case 0: /* timeout */
break;
default:
if((keepon & KEEP_READ) && FD_ISSET(conn->sockfd, &readfd)) {
/* read! */
urg = Curl_read(conn, conn->sockfd, buf, BUFSIZE -1, &nread);
/* NULL terminate, allowing string ops to be used */
if (0 < (signed int) nread)
buf[nread] = 0;
/* if we receive 0 or less here, the server closed the connection and
we bail out from this! */
else if (0 >= (signed int) nread) {
keepon &= ~KEEP_READ;
break;
}
str = buf; /* Default buffer to use when we write the
buffer, it may be changed in the flow below
before the actual storing is done. */
/* Since this is a two-state thing, we check if we are parsing
headers at the moment or not. */
if (header) {
/* we are in parse-the-header-mode */
/* header line within buffer loop */
do {
int hbufp_index;
str_start = str; /* str_start is start of line within buf */
end_ptr = strchr (str_start, '\n');
if (!end_ptr) {
/* no more complete header lines within buffer */
/* copy what is remaining into headerbuff */
int str_length = (int)strlen(str);
if (hbuflen + (int)str_length >= data->headersize) {
char *newbuff;
long newsize=MAX((hbuflen+str_length)*3/2,
data->headersize*2);
hbufp_index = hbufp - data->headerbuff;
newbuff = (char *)realloc(data->headerbuff, newsize);
if(!newbuff) {
failf (data, "Failed to alloc memory for big header!");
return CURLE_READ_ERROR;
}
data->headersize=newsize;
data->headerbuff = newbuff;
hbufp = data->headerbuff + hbufp_index;
}
strcpy (hbufp, str);
hbufp += strlen (str);
hbuflen += strlen (str);
break; /* read more and try again */
}
str = end_ptr + 1; /* move just past new line */
if (hbuflen + (str - str_start) >= data->headersize) {
char *newbuff;
long newsize=MAX((hbuflen+(str-str_start))*3/2,
data->headersize*2);
hbufp_index = hbufp - data->headerbuff;
newbuff = (char *)realloc(data->headerbuff, newsize);
if(!newbuff) {
failf (data, "Failed to alloc memory for big header!");
return CURLE_READ_ERROR;
}
data->headersize= newsize;
data->headerbuff = newbuff;
hbufp = data->headerbuff + hbufp_index;
}
/* copy to end of line */
strncpy (hbufp, str_start, str - str_start);
hbufp += str - str_start;
hbuflen += str - str_start;
*hbufp = 0;
p = data->headerbuff;
/* we now have a full line that p points to */
if (('\n' == *p) || ('\r' == *p)) {
/* Zero-length line means end of header! */
if (-1 != conn->size) /* if known */
conn->size += bytecount; /* we append the already read size */
if ('\r' == *p)
p++; /* pass the \r byte */
if ('\n' == *p)
p++; /* pass the \n byte */
Curl_pgrsSetDownloadSize(data, conn->size);
header = FALSE; /* no more header to parse! */
/* now, only output this if the header AND body are requested:
*/
writetype = CLIENTWRITE_HEADER;
if (data->bits.http_include_header)
writetype |= CLIENTWRITE_BODY;
urg = Curl_client_write(data, writetype, data->headerbuff,
p - data->headerbuff);
if(urg)
return urg;
data->header_size += p - data->headerbuff;
break; /* exit header line loop */
}
if (!headerline++) {
/* This is the first header, it MUST be the error code line
or else we consiser this to be the body right away! */
if (sscanf (p, " HTTP/1.%*c %3d", &code)) {
/* 404 -> URL not found! */
if (
( ((data->bits.http_follow_location) && (code >= 400))
||
(!data->bits.http_follow_location && (code >= 300)))
&& (data->bits.http_fail_on_error)) {
/* If we have been told to fail hard on HTTP-errors,
here is the check for that: */
/* serious error, go home! */
failf (data, "The requested file was not found");
return CURLE_HTTP_NOT_FOUND;
}
data->progress.httpcode = code;
}
else {
header = FALSE; /* this is not a header line */
break;
}
}
/* check for Content-Length: header lines to get size */
if (strnequal("Content-Length", p, 14) &&
sscanf (p+14, ": %ld", &contentlength))
conn->size = contentlength;
else if (strnequal("Content-Range", p, 13)) {
if (sscanf (p+13, ": bytes %d-", &offset) ||
sscanf (p+13, ": bytes: %d-", &offset)) {
/* This second format was added August 1st by Igor
Khristophorov since Sun's webserver JavaWebServer/1.1.1
obviously sends the header this way! :-( */
if (data->resume_from == offset) {
/* we asked for a resume and we got it */
content_range = TRUE;
}
}
}
else if(data->cookies &&
strnequal("Set-Cookie: ", p, 11)) {
Curl_cookie_add(data->cookies, TRUE, &p[12]);
}
else if(strnequal("Last-Modified:", p,
strlen("Last-Modified:")) &&
(data->timecondition || data->bits.get_filetime) ) {
time_t secs=time(NULL);
timeofdoc = curl_getdate(p+strlen("Last-Modified:"), &secs);
if(data->bits.get_filetime)
data->progress.filetime = timeofdoc;
}
else if ((code >= 300 && code < 400) &&
(data->bits.http_follow_location) &&
strnequal("Location: ", p, 10)) {
/* this is the URL that the server advices us to get instead */
char *ptr;
char *start=p;
char backup;
start += 10; /* pass "Location: " */
ptr = start; /* start scanning here */
/* scan through the string to find the end */
while(*ptr && !isspace((int)*ptr))
ptr++;
backup = *ptr; /* store the ending letter */
*ptr = '\0'; /* zero terminate */
data->newurl = strdup(start); /* clone string */
*ptr = backup; /* restore ending letter */
}
writetype = CLIENTWRITE_HEADER;
if (data->bits.http_include_header)
writetype |= CLIENTWRITE_BODY;
urg = Curl_client_write(data, writetype, p, hbuflen);
if(urg)
return urg;
data->header_size += hbuflen;
/* reset hbufp pointer && hbuflen */
hbufp = data->headerbuff;
hbuflen = 0;
}
while (*str); /* header line within buffer */
/* We might have reached the end of the header part here, but
there might be a non-header part left in the end of the read
buffer. */
if (!header) {
/* the next token and forward is not part of
the header! */
/* we subtract the remaining header size from the buffer */
nread -= (str - buf);
}
} /* end if header mode */
/* This is not an 'else if' since it may be a rest from the header
parsing, where the beginning of the buffer is headers and the end
is non-headers. */
if (str && !header && ((signed int)nread > 0)) {
if(0 == bodywrites) {
/* These checks are only made the first time we are about to
write a chunk of the body */
if(conn->protocol&PROT_HTTP) {
/* HTTP-only checks */
if (data->newurl) {
/* abort after the headers if "follow Location" is set */
infof (data, "Follow to new URL: %s\n", data->newurl);
return CURLE_OK;
}
else if (data->resume_from &&
!content_range &&
(data->httpreq==HTTPREQ_GET)) {
/* we wanted to resume a download, although the server
doesn't seem to support this and we did this with a GET
(if it wasn't a GET we did a POST or PUT resume) */
failf (data, "HTTP server doesn't seem to support "
"byte ranges. Cannot resume.");
return CURLE_HTTP_RANGE_ERROR;
}
else if(data->timecondition && !data->range) {
/* A time condition has been set AND no ranges have been
requested. This seems to be what chapter 13.3.4 of
RFC 2616 defines to be the correct action for a
HTTP/1.1 client */
if((timeofdoc > 0) && (data->timevalue > 0)) {
switch(data->timecondition) {
case TIMECOND_IFMODSINCE:
default:
if(timeofdoc < data->timevalue) {
infof(data,
"The requested document is not new enough\n");
return CURLE_OK;
}
break;
case TIMECOND_IFUNMODSINCE:
if(timeofdoc > data->timevalue) {
infof(data,
"The requested document is not old enough\n");
return CURLE_OK;
}
break;
} /* switch */
} /* two valid time strings */
} /* we have a time condition */
} /* this is HTTP */
} /* this is the first time we write a body part */
bodywrites++;
if(data->maxdownload &&
(bytecount + nread > data->maxdownload)) {
nread = data->maxdownload - bytecount;
if((signed int)nread < 0 ) /* this should be unusual */
nread = 0;
keepon &= ~KEEP_READ; /* we're done reading */
}
bytecount += nread;
Curl_pgrsSetDownloadCounter(data, (double)bytecount);
urg = Curl_client_write(data, CLIENTWRITE_BODY, str, nread);
if(urg)
return urg;
} /* if (! header and data to read ) */
} /* if( read from socket ) */
if((keepon & KEEP_WRITE) && FD_ISSET(conn->writesockfd, &writefd)) {
/* write */
char scratch[BUFSIZE * 2];
int i, si;
size_t bytes_written;
if(data->crlf)
buf = data->buffer; /* put it back on the buffer */
nread = data->fread(buf, 1, conn->upload_bufsize, data->in);
/* the signed int typecase of nread of for systems that has
unsigned size_t */
if ((signed int)nread<=0) {
/* done */
keepon &= ~KEEP_WRITE; /* we're done writing */
break;
}
writebytecount += nread;
Curl_pgrsSetUploadCounter(data, (double)writebytecount);
/* convert LF to CRLF if so asked */
if (data->crlf) {
for(i = 0, si = 0; i < (int)nread; i++, si++) {
if (buf[i] == 0x0a) {
scratch[si++] = 0x0d;
scratch[si] = 0x0a;
}
else {
scratch[si] = buf[i];
}
}
nread = si;
buf = scratch; /* point to the new buffer */
}
/* write to socket */
urg = Curl_write(conn, conn->writesockfd, buf, nread,
&bytes_written);
if(nread != bytes_written) {
failf(data, "Failed uploading data");
return CURLE_WRITE_ERROR;
}
}
break;
}
now = Curl_tvnow();
if(Curl_pgrsUpdate(data))
urg = CURLE_ABORTED_BY_CALLBACK;
else
urg = Curl_speedcheck (data, now);
if (urg)
return urg;
if(data->progress.ulspeed > conn->upload_bufsize) {
/* If we're transfering more data per second than fits in our buffer,
we increase the buffer size to adjust to the current
speed. However, we must not set it larger than BUFSIZE. We don't
adjust it downwards again since we don't see any point in that!
*/
conn->upload_bufsize=(long)min(data->progress.ulspeed, BUFSIZE);
}
if (data->timeout && (Curl_tvdiff (now, start) > data->timeout)) {
failf (data, "Operation timed out with %d out of %d bytes received",
bytecount, conn->size);
return CURLE_OPERATION_TIMEOUTED;
}
}
}
if(!(data->bits.no_body) && contentlength &&
(bytecount != contentlength)) {
failf(data, "transfer closed with %d bytes remaining to read",
contentlength-bytecount);
return CURLE_PARTIAL_FILE;
}
if(Curl_pgrsUpdate(data))
return CURLE_ABORTED_BY_CALLBACK;
if(conn->bytecountp)
*conn->bytecountp = bytecount; /* read count */
if(conn->writebytecountp)
*conn->writebytecountp = writebytecount; /* write count */
return CURLE_OK;
}
typedef int (*func_T)(void);
CURLcode curl_transfer(CURL *curl)
{
CURLcode res;
struct UrlData *data = curl;
struct connectdata *c_connect=NULL;
Curl_pgrsStartNow(data);
do {
Curl_pgrsTime(data, TIMER_STARTSINGLE);
res = curl_connect(curl, (CURLconnect **)&c_connect);
if(res == CURLE_OK) {
res = curl_do(c_connect);
if(res == CURLE_OK) {
res = _Transfer(c_connect); /* now fetch that URL please */
if(res == CURLE_OK)
res = curl_done(c_connect);
}
if((res == CURLE_OK) && data->newurl) {
/* Location: redirect
This is assumed to happen for HTTP(S) only!
*/
char prot[16]; /* URL protocol string storage */
char letter; /* used for a silly sscanf */
if (data->maxredirs && (data->followlocation >= data->maxredirs)) {
failf(data,"Maximum (%d) redirects followed", data->maxredirs);
curl_disconnect(c_connect);
res=CURLE_TOO_MANY_REDIRECTS;
break;
}
/* mark the next request as a followed location: */
data->bits.this_is_a_follow = TRUE;
data->followlocation++; /* count location-followers */
if(data->bits.http_auto_referer) {
/* We are asked to automatically set the previous URL as the
referer when we get the next URL. We pick the ->url field,
which may or may not be 100% correct */
if(data->free_referer) {
/* If we already have an allocated referer, free this first */
free(data->referer);
}
data->referer = strdup(data->url);
data->free_referer = TRUE; /* yes, free this later */
data->bits.http_set_referer = TRUE; /* might have been false */
}
if(2 != sscanf(data->newurl, "%15[^:]://%c", prot, &letter)) {
/***
*DANG* this is an RFC 2068 violation. The URL is supposed
to be absolute and this doesn't seem to be that!
***
Instead, we have to TRY to append this new path to the old URL
to the right of the host part. Oh crap, this is doomed to cause
problems in the future...
*/
char *protsep;
char *pathsep;
char *newest;
/* protsep points to the start of the host name */
protsep=strstr(data->url, "//");
if(!protsep)
protsep=data->url;
else {
/* TBD: set the port with curl_setopt() */
data->port=0; /* we got a full URL and then we should reset the
port number here to re-initiate it later */
protsep+=2; /* pass the slashes */
}
if('/' != data->newurl[0]) {
/* First we need to find out if there's a ?-letter in the URL,
and cut it and the right-side of that off */
pathsep = strrchr(protsep, '?');
if(pathsep)
*pathsep=0;
/* we have a relative path to append to the last slash if
there's one available */
pathsep = strrchr(protsep, '/');
if(pathsep)
*pathsep=0;
}
else {
/* We got a new absolute path for this server, cut off from the
first slash */
pathsep = strchr(protsep, '/');
if(pathsep)
*pathsep=0;
}
newest=(char *)malloc( strlen(data->url) +
1 + /* possible slash */
strlen(data->newurl) + 1/* zero byte */);
if(!newest)
return CURLE_OUT_OF_MEMORY;
sprintf(newest, "%s%s%s", data->url, ('/' == data->newurl[0])?"":"/",
data->newurl);
free(data->newurl);
data->newurl = newest;
}
else {
/* This was an absolute URL, clear the port number! */
/* TBD: set the port with curl_setopt() */
data->port = 0;
}
if(data->bits.urlstringalloc)
free(data->url);
/* TBD: set the URL with curl_setopt() */
data->url = data->newurl;
data->newurl = NULL; /* don't show! */
data->bits.urlstringalloc = TRUE; /* the URL is allocated */
infof(data, "Follows Location: to new URL: '%s'\n", data->url);
/*
* We get here when the HTTP code is 300-399. We need to perform
* differently based on exactly what return code there was.
* Discussed on the curl mailing list and posted about on the 26th
* of January 2001.
*/
switch(data->progress.httpcode) {
case 300: /* Multiple Choices */
case 301: /* Moved Permanently */
case 302: /* Found */
case 306: /* Not used */
case 307: /* Temporary Redirect */
default: /* for all unknown ones */
/* These are explicitly mention since I've checked RFC2616 and they
* seem to be OK to POST to.
*/
break;
case 303: /* See Other */
/* Disable both types of POSTs, since doing a second POST when
* following isn't what anyone would want! */
data->bits.http_post = FALSE;
data->bits.http_formpost = FALSE;
data->httpreq = HTTPREQ_GET; /* enfore GET request */
infof(data, "Disables POST\n");
break;
case 304: /* Not Modified */
/* 304 means we did a conditional request and it was "Not modified".
* We shouldn't get any Location: header in this response!
*/
break;
case 305: /* Use Proxy */
/* (quote from RFC2616, section 10.3.6):
* "The requested resource MUST be accessed through the proxy given
* by the Location field. The Location field gives the URI of the
* proxy. The recipient is expected to repeat this single request
* via the proxy. 305 responses MUST only be generated by origin
* servers."
*/
break;
}
curl_disconnect(c_connect);
continue;
}
curl_disconnect(c_connect);
}
break; /* it only reaches here when this shouldn't loop */
} while(1); /* loop if Location: */
if(data->newurl)
free(data->newurl);
return res;
}
CURLcode
Curl_Transfer(struct connectdata *c_conn, /* connection data */
int sockfd, /* socket to read from or -1 */
int size, /* -1 if unknown at this point */
bool getheader, /* TRUE if header parsing is wanted */
long *bytecountp, /* return number of bytes read or NULL */
int writesockfd, /* socket to write to, it may very well be
the same we read from. -1 disables */
long *writebytecountp /* return number of bytes written or
NULL */
)
{
struct connectdata *conn = (struct connectdata *)c_conn;
if(!conn)
return CURLE_BAD_FUNCTION_ARGUMENT;
/* now copy all input parameters */
conn->sockfd = sockfd;
conn->size = size;
conn->getheader = getheader;
conn->bytecountp = bytecountp;
conn->writesockfd = writesockfd;
conn->writebytecountp = writebytecountp;
return CURLE_OK;
}