Pre-pad WAL files when streaming transaction log

Instead of filling files as they appear, pre-pad the
WAL files received when streaming xlog the same way
that the server does. Data is streamed into a .partial
file which is then renamed()d into palce when it's complete,
but it will always be 16MB.

This also means that the starting position for pg_receivexlog
is now simply right after the last complete segment, and we
never need to deal with partial segments there.

Patch by me, review by Fujii Masao
This commit is contained in:
Magnus Hagander 2011-11-03 15:37:08 +01:00
parent 4429f6a9e3
commit e7cc8437bb
2 changed files with 125 additions and 79 deletions

View File

@ -71,33 +71,10 @@ usage(void)
static bool
segment_callback(XLogRecPtr segendpos, uint32 timeline)
{
char fn[MAXPGPATH];
struct stat statbuf;
if (verbose)
fprintf(stderr, _("%s: finished segment at %X/%X (timeline %u)\n"),
progname, segendpos.xlogid, segendpos.xrecoff, timeline);
/*
* Check if there is a partial file for the name we just finished, and if
* there is, remove it under the assumption that we have now got all the
* data we need.
*/
segendpos.xrecoff /= XLOG_SEG_SIZE;
PrevLogSeg(segendpos.xlogid, segendpos.xrecoff);
snprintf(fn, sizeof(fn), "%s/%08X%08X%08X.partial",
basedir, timeline,
segendpos.xlogid,
segendpos.xrecoff);
if (stat(fn, &statbuf) == 0)
{
/* File existed, get rid of it */
if (verbose)
fprintf(stderr, _("%s: removing file \"%s\"\n"),
progname, fn);
unlink(fn);
}
/*
* Never abort from this - we handle all aborting in continue_streaming()
*/
@ -119,9 +96,8 @@ continue_streaming(void)
/*
* Determine starting location for streaming, based on:
* 1. If there are existing xlog segments, start at the end of the last one
* 2. If the last one is a partial segment, rename it and start over, since
* we don't sync after every write.
* 3. If no existing xlog exists, start from the beginning of the current
* that is complete (size matches XLogSegSize)
* 2. If no valid xlog exists, start from the beginning of the current
* WAL segment.
*/
static XLogRecPtr
@ -133,7 +109,6 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
bool b;
uint32 high_log = 0;
uint32 high_seg = 0;
bool partial = false;
dir = opendir(basedir);
if (dir == NULL)
@ -195,7 +170,7 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
disconnect_and_exit(1);
}
if (statbuf.st_size == 16 * 1024 * 1024)
if (statbuf.st_size == XLOG_SEG_SIZE)
{
/* Completed segment */
if (log > high_log ||
@ -208,37 +183,9 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
}
else
{
/*
* This is a partial file. Rename it out of the way.
*/
char newfn[MAXPGPATH];
fprintf(stderr, _("%s: renaming partial file \"%s\" to \"%s.partial\"\n"),
progname, dirent->d_name, dirent->d_name);
snprintf(newfn, sizeof(newfn), "%s/%s.partial",
basedir, dirent->d_name);
if (stat(newfn, &statbuf) == 0)
{
/*
* XXX: perhaps we should only error out if the existing file
* is larger?
*/
fprintf(stderr, _("%s: file \"%s\" already exists. Check and clean up manually.\n"),
progname, newfn);
disconnect_and_exit(1);
}
if (rename(fullpath, newfn) != 0)
{
fprintf(stderr, _("%s: could not rename \"%s\" to \"%s\": %s\n"),
progname, fullpath, newfn, strerror(errno));
disconnect_and_exit(1);
}
/* Don't continue looking for more, we assume this is the last */
partial = true;
break;
fprintf(stderr, _("%s: segment file '%s' is incorrect size %d, skipping\n"),
progname, dirent->d_name, (int) statbuf.st_size);
continue;
}
}
@ -247,17 +194,11 @@ FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeline)
if (high_log > 0 || high_seg > 0)
{
XLogRecPtr high_ptr;
if (!partial)
{
/*
* If the segment was partial, the pointer is already at the right
* location since we want to re-transmit that segment. If it was
* not, we need to move it to the next segment, since we are
* tracking the last one that was complete.
*/
NextLogSeg(high_log, high_seg);
}
/*
* Move the starting pointer to the start of the next segment,
* since the highest one we've seen was completed.
*/
NextLogSeg(high_log, high_seg);
high_ptr.xlogid = high_log;
high_ptr.xrecoff = high_seg * XLOG_SEG_SIZE;

View File

@ -27,6 +27,7 @@
#include "receivelog.h"
#include "streamutil.h"
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
@ -41,24 +42,128 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0};
* Open a new WAL file in the specified directory. Store the name
* (not including the full directory) in namebuf. Assumes there is
* enough room in this buffer...
*
* The file will be padded to 16Mb with zeroes.
*/
static int
open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, char *namebuf)
{
int f;
char fn[MAXPGPATH];
struct stat statbuf;
char *zerobuf;
int bytes;
XLogFileName(namebuf, timeline, startpoint.xlogid,
startpoint.xrecoff / XLOG_SEG_SIZE);
snprintf(fn, sizeof(fn), "%s/%s", basedir, namebuf);
f = open(fn, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, 0666);
snprintf(fn, sizeof(fn), "%s/%s.partial", basedir, namebuf);
f = open(fn, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);
if (f == -1)
{
fprintf(stderr, _("%s: Could not open WAL segment %s: %s\n"),
progname, namebuf, strerror(errno));
progname, fn, strerror(errno));
return -1;
}
/*
* Verify that the file is either empty (just created), or a complete
* XLogSegSize segment. Anything in between indicates a corrupt file.
*/
if (fstat(f, &statbuf) != 0)
{
fprintf(stderr, _("%s: could not stat WAL segment %s: %s\n"),
progname, fn, strerror(errno));
close(f);
return -1;
}
if (statbuf.st_size == XLogSegSize)
return f; /* File is open and ready to use */
if (statbuf.st_size != 0)
{
fprintf(stderr, _("%s: WAL segment %s is %d bytes, should be 0 or %d\n"),
progname, fn, (int) statbuf.st_size, XLogSegSize);
close(f);
return -1;
}
/* New, empty, file. So pad it to 16Mb with zeroes */
zerobuf = xmalloc0(XLOG_BLCKSZ);
for (bytes = 0; bytes < XLogSegSize; bytes += XLOG_BLCKSZ)
{
if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
fprintf(stderr, _("%s: could not pad WAL segment %s: %s\n"),
progname, fn, strerror(errno));
close(f);
unlink(fn);
return -1;
}
}
free(zerobuf);
if (lseek(f, SEEK_SET, 0) != 0)
{
fprintf(stderr, _("%s: could not seek back to beginning of WAL segment %s: %s\n"),
progname, fn, strerror(errno));
close(f);
return -1;
}
return f;
}
static bool
close_walfile(int walfile, char *basedir, char *walname)
{
off_t currpos = lseek(walfile, 0, SEEK_CUR);
if (currpos == -1)
{
fprintf(stderr, _("%s: could not get current position in file %s: %s\n"),
progname, walname, strerror(errno));
return false;
}
if (fsync(walfile) != 0)
{
fprintf(stderr, _("%s: could not fsync file %s: %s\n"),
progname, walname, strerror(errno));
return false;
}
if (close(walfile) != 0)
{
fprintf(stderr, _("%s: could not close file %s: %s\n"),
progname, walname, strerror(errno));
return false;
}
/*
* Rename the .partial file only if we've completed writing the
* whole segment.
*/
if (currpos == XLOG_SEG_SIZE)
{
char oldfn[MAXPGPATH];
char newfn[MAXPGPATH];
snprintf(oldfn, sizeof(oldfn), "%s/%s.partial", basedir, walname);
snprintf(newfn, sizeof(newfn), "%s/%s", basedir, walname);
if (rename(oldfn, newfn) != 0)
{
fprintf(stderr, _("%s: could not rename file %s: %s\n"),
progname, walname, strerror(errno));
return false;
}
}
else
fprintf(stderr, _("%s: not renaming %s, segment is not complete.\n"),
progname, walname);
return true;
}
/*
* Local version of GetCurrentTimestamp(), since we are not linked with
* backend code.
@ -178,10 +283,8 @@ ReceiveXlogStream(PGconn *conn, XLogRecPtr startpos, uint32 timeline, char *sysi
if (stream_continue && stream_continue())
{
if (walfile != -1)
{
fsync(walfile);
close(walfile);
}
/* Potential error message is written by close_walfile */
return close_walfile(walfile, basedir, current_walfile_name);
return true;
}
@ -360,8 +463,10 @@ ReceiveXlogStream(PGconn *conn, XLogRecPtr startpos, uint32 timeline, char *sysi
/* Did we reach the end of a WAL segment? */
if (blockpos.xrecoff % XLOG_SEG_SIZE == 0)
{
fsync(walfile);
close(walfile);
if (!close_walfile(walfile, basedir, current_walfile_name))
/* Error message written in close_walfile() */
return false;
walfile = -1;
xlogoff = 0;