diff --git a/contrib/pg_standby/Makefile b/contrib/pg_standby/Makefile new file mode 100644 index 0000000000..99abebc3a1 --- /dev/null +++ b/contrib/pg_standby/Makefile @@ -0,0 +1,18 @@ + +PROGRAM = pg_standby +OBJS = pg_standby.o + +PG_CPPFLAGS = -I$(libpq_srcdir) +PG_LIBS = $(libpq_pgport) + +DOCS = README.pg_standby + +ifdef USE_PGXS +PGXS := $(shell pg_config --pgxs) +include $(PGXS) +else +subdir = contrib/pg_standby +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_standby/README.pg_standby b/contrib/pg_standby/README.pg_standby new file mode 100644 index 0000000000..9c4ef8ecb6 --- /dev/null +++ b/contrib/pg_standby/README.pg_standby @@ -0,0 +1,164 @@ +pg_standby README 2006/12/08 Simon Riggs + +o What is pg_standby? + + pg_standby is a production-ready program that can be used to + create a Warm Standby server. Other configuration is required + as well, all of which is described in the main server manual. + + The program is designed to be a wait-for restore_command, + required to turn a normal archive recovery into a Warm Standby. + Within the restore_command of the recovery.conf you could + configure pg_standby in the following way: + + restore_command = 'pg_standby archiveDir %f %p' + + which would be sufficient to define that files will be restored + from archiveDir. + +o features of pg_standby + + - pg_standby is written in C. So it is very portable + and easy to install. + + - supports copy or link from a directory (only) + + - source easy to modify, with specifically designated + sections to modify for your own needs, allowing + interfaces to be written for additional Backup Archive Restore + (BAR) systems + + - portable: tested on Linux and Windows + +o How to install pg_standby + + $make + $make install + +o How to use pg_standby? + + pg_standby should be used within the restore_command of the + recovery.conf file. See the main PostgreSQL manual for details. + + The basic usage should be like this: + + restore_command = 'pg_standby archiveDir %f %p' + + with the pg_standby command usage as + + pg_standby [OPTION]... [ARCHIVELOCATION] [NEXTWALFILE] [XLOGFILEPATH] + + When used within the restore_command the %f and %p macros + will provide the actual file and path required for the restore/recovery. + +o options + + pg_standby has number of options. + + -c + use copy/cp command to restore WAL files from archive + + -d + debug/logging option. + + -k numfiles + Cleanup files in the archive so that we maintain no more + than this many files in the archive. + + You should be wary against setting this number too low, + since this may mean you cannot restart the standby. This + is because the last restartpoint marked in the WAL files + may be many files in the past and can vary considerably. + This should be set to a value exceeding the number of WAL + files that can be recovered in 2*checkpoint_timeout seconds, + according to the value in the warm standby postgresql.conf. + It is wholly unrelated to the setting of checkpoint_segments + on either primary or standby. + + If in doubt, use a large value or do not set a value at all. + + -l + use ln command to restore WAL files from archive + WAL files will remain in archive + + Link is more efficient, but the default is copy to + allow you to maintain the WAL archive for recovery + purposes as well as high-availability. + + This option uses the Windows Vista command mklink + to provide a file-to-file symbolic link. -l will + not work on versions of Windows prior to Vista. + Use the -c option instead. + see http://en.wikipedia.org/wiki/NTFS_symbolic_link + + -r maxretries + the maximum number of times to retry the restore command if it + fails. After each failure, we wait for sleeptime * num_retries + so that the wait time increases progressively, so by default + we will wait 5 secs, 10 secs then 15 secs before reporting + the failure back to the database server. This will be + interpreted as and end of recovery and the Standby will come + up fully as a result. + Default=3 + + -s sleeptime + the number of seconds to sleep between testing to see + if the file to be restored is available in the archive yet. + The default setting is not necessarily recommended, + consult the main database server manual for discussion. + Default=5 + + -t triggerfile + the presence of the triggerfile will cause recovery to end + whether or not the next file is available + It is recommended that you use a structured filename to + avoid confusion as to which server is being triggered + when multiple servers exist on same system. + e.g. /tmp/pgsql.trigger.5432 + + -w maxwaittime + the maximum number of seconds to wait for the next file, + after which recovery will end and the Standby will come up. + The default setting is not necessarily recommended, + consult the main database server manual for discussion. + Default=0 + + Note: --help is not supported since pg_standby is not intended + for interactive use, except during dev/test + +o examples + + Linux + + archive_command = 'cp %p ../archive/%f' + + restore_command = 'pg_standby -l -d -k 255 -r 2 -s 2 -w 0 -t /tmp/pgsql.trigger.5442 $PWD/../archive %f %p 2>> standby.log' + + which will + - use a ln command to restore WAL files from archive + - produce logfile output in standby.log + - keep the last 255 full WAL files, plus the current one + - sleep for 2 seconds between checks for next WAL file is full + - never timeout if file not found + - stop waiting when a trigger file called /tmp.pgsql.trigger.5442 appears + + Windows + + archive_command = 'copy %p ..\\archive\\%f' + Note that backslashes need to be doubled in the archive_command, but + *not* in the restore_command, in 8.2, 8.1, 8.0 on Windows. + + restore_command = 'pg_standby -c -d -s 5 -w 0 -t C:\pgsql.trigger.5442 +..\archive %f %p 2>> standby.log' + + which will + - use a copy command to restore WAL files from archive + - produce logfile output in standby.log + - sleep for 5 seconds between checks for next WAL file is full + - never timeout if file not found + - stop waiting when a trigger file called C:\pgsql.trigger.5442 appears + +o reported test success + + SUSE Linux 10.2 + Windows XP Pro diff --git a/contrib/pg_standby/pg_standby.c b/contrib/pg_standby/pg_standby.c new file mode 100644 index 0000000000..ef732aa075 --- /dev/null +++ b/contrib/pg_standby/pg_standby.c @@ -0,0 +1,619 @@ +/* + * pg_standby.c + * + * Production-ready example of how to create a Warm Standby + * database server using continuous archiving as a + * replication mechanism + * + * We separate the parameters for archive and nextWALfile + * so that we can check the archive exists, even if the + * WAL file doesn't (yet). + * + * This program will be executed once in full for each file + * requested by the warm standby server. + * + * It is designed to cater to a variety of needs, as well + * providing a customizable section. + * + * Original author: Simon Riggs simon@2ndquadrant.com + * Current maintainer: Simon Riggs + */ +#include "postgres_fe.h" +#include "pg_config_manual.h" + +#include +#include +#include +#include + +#ifdef WIN32 +#include "win32.h" +int getopt(int argc, char * const argv[], const char *optstring); +#else +#include +#include + +#ifdef HAVE_GETOPT_H +#include +#endif + +#endif /* ! WIN32 */ + +extern char *optarg; +extern int optind; + +/* Options and defaults */ +int sleeptime = 5; /* amount of time to sleep between file checks */ +int waittime = -1; /* how long we have been waiting, -1 no wait yet */ +int maxwaittime = 0; /* how long are we prepared to wait for? */ +int keepfiles = 0; /* number of WAL files to keep, 0 keep all */ +int maxretries = 3; /* number of retries on restore command */ +bool debug = false; /* are we debugging? */ +bool triggered = false; +bool signaled = false; + +char *archiveLocation; /* where to find the archive? */ +char *triggerPath; /* where to find the trigger file? */ +char *xlogFilePath; /* where we are going to restore to */ +char *nextWALFileName; /* the file we need to get from archive */ +char *priorWALFileName; /* the file we need to get from archive */ +char WALFilePath[MAXPGPATH];/* the file path including archive */ +char restoreCommand[MAXPGPATH]; /* run this to restore */ +char inclusiveCleanupFileName[MAXPGPATH]; /* the file we need to get from archive */ + +#define RESTORE_COMMAND_COPY 0 +#define RESTORE_COMMAND_LINK 1 +int restoreCommandType; + +#define XLOG_DATA 0 +#define XLOG_HISTORY 1 +#define XLOG_BACKUP_LABEL 2 +int nextWALFileType; + +#define SET_RESTORE_COMMAND(cmd, arg1, arg2) \ + snprintf(restoreCommand, MAXPGPATH, cmd " %s %s", arg1, arg2) + +struct stat stat_buf; + +/* ===================================================================== + * + * Customizable section + * + * ===================================================================== + * + * Currently, this section assumes that the Archive is a locally + * accessible directory. If you want to make other assumptions, + * such as using a vendor-specific archive and access API, these + * routines are the ones you'll need to change. You're + * enouraged to submit any changes to pgsql-patches@postgresql.org + * or personally to the current maintainer. Those changes may be + * folded in to later versions of this program. + */ + +#define XLOG_DATA_FNAME_LEN 24 +/* Reworked from access/xlog_internal.h */ +#define XLogFileName(fname, tli, log, seg) \ + snprintf(fname, XLOG_DATA_FNAME_LEN + 1, "%08X%08X%08X", tli, log, seg) + +/* + * Initialize allows customized commands into the warm standby program. + * + * As an example, and probably the common case, we use either + * cp/ln commands on *nix, or copy/move command on Windows. + * + */ +static void +CustomizableInitialize(void) +{ +#ifdef WIN32 + snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName); + switch (restoreCommandType) + { + case RESTORE_COMMAND_LINK: + SET_RESTORE_COMMAND("mklink",WALFilePath, xlogFilePath); + case RESTORE_COMMAND_COPY: + default: + SET_RESTORE_COMMAND("copy",WALFilePath, xlogFilePath); + break; + } +#else + snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName); + switch (restoreCommandType) + { + case RESTORE_COMMAND_LINK: +#if HAVE_WORKING_LINK + SET_RESTORE_COMMAND("ln -s -f",WALFilePath, xlogFilePath); + break; +#endif + case RESTORE_COMMAND_COPY: + default: + SET_RESTORE_COMMAND("cp",WALFilePath, xlogFilePath); + break; + } +#endif + + /* + * This code assumes that archiveLocation is a directory + * You may wish to add code to check for tape libraries, etc.. + * So, since it is a directory, we use stat to test if its accessible + */ + if (stat(archiveLocation, &stat_buf) != 0) + { + fprintf(stderr, "pg_standby: archiveLocation \"%s\" does not exist\n", archiveLocation); + fflush(stderr); + exit(2); + } +} + +/* + * CustomizableNextWALFileReady() + * + * Is the requested file ready yet? + */ +static bool +CustomizableNextWALFileReady() +{ + if (stat(WALFilePath, &stat_buf) == 0) + { + /* + * If its a backup file, return immediately + * If its a regular file return only if its the right size already + */ + if (strlen(nextWALFileName) > 24 && + strspn(nextWALFileName, "0123456789ABCDEF") == 24 && + strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"), + ".backup") == 0) + { + nextWALFileType = XLOG_BACKUP_LABEL; + return true; + } + else + if (stat_buf.st_size == XLOG_SEG_SIZE) + { +#ifdef WIN32 + /* + * Windows reports that the file has the right number of bytes + * even though the file is still being copied and cannot be + * opened by pg_standby yet. So we wait for sleeptime secs + * before attempting to restore. If that is not enough, we + * will rely on the retry/holdoff mechanism. + */ + pg_usleep(sleeptime * 1000000L); +#endif + nextWALFileType = XLOG_DATA; + return true; + } + + /* + * If still too small, wait until it is the correct size + */ + if (stat_buf.st_size > XLOG_SEG_SIZE) + { + if (debug) + { + fprintf(stderr, "file size greater than expected\n"); + fflush(stderr); + } + exit(3); + } + } + + return false; +} + +#define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE ) + +static void +CustomizableCleanupPriorWALFiles(void) +{ + uint32 tli, + log, + seg; + int signed_log = 0; + + if (keepfiles > 0) + { + sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg); + signed_log = log - (keepfiles / MaxSegmentsPerLogFile); + if (keepfiles <= seg) + seg -= keepfiles; + else + { + seg = MaxSegmentsPerLogFile - (keepfiles % MaxSegmentsPerLogFile); + signed_log--; + } + log = (uint32) signed_log; + } + + /* + * Work out name of prior file from current filename + */ + if (keepfiles > 0 && signed_log >= 0 && nextWALFileType == XLOG_DATA) + { + int rc; + DIR *xldir; + struct dirent *xlde; + + XLogFileName(inclusiveCleanupFileName, tli, log, seg); + + /* + * Assume its OK to keep failing. The failure situation may change over + * time, so we'd rather keep going on the main processing than fail + * because we couldnt clean up yet. + */ + if ((xldir = opendir(archiveLocation)) != NULL) + { + while ((xlde = readdir(xldir)) != NULL) + { + /* + * We ignore the timeline part of the XLOG segment identifiers in + * deciding whether a segment is still needed. This ensures that we + * won't prematurely remove a segment from a parent timeline. We could + * probably be a little more proactive about removing segments of + * non-parent timelines, but that would be a whole lot more + * complicated. + * + * We use the alphanumeric sorting property of the filenames to decide + * which ones are earlier than the inclusiveCleanupFileName file. + */ + if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN && + strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN && + strcmp(xlde->d_name + 8, inclusiveCleanupFileName + 8) <= 0) + { +#ifdef WIN32 + snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name); +#else + snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name); +#endif + rc = unlink(WALFilePath); + + if (debug) + fprintf(stderr, "\npg_standby: removed \"%s\"\n", WALFilePath); + } + } + } + else + fprintf(stderr, "pg_standby: archiveLocation \"%s\" open error\n", archiveLocation); + + closedir(xldir); + } + fflush(stderr); +} + +/* ===================================================================== + * End of Customizable section + * ===================================================================== + */ + +/* + * CheckForExternalTrigger() + * + * Is there a trigger file? + */ +static bool +CheckForExternalTrigger(void) +{ + int rc; + + /* + * Look for a trigger file, if that option has been selected + * + * We use stat() here because triggerPath is always a file + * rather than potentially being in an archive + */ + if (triggerPath && stat(triggerPath, &stat_buf) == 0) + { + fprintf(stderr, "trigger file found\n"); + fflush(stderr); + + /* + * If trigger file found, we *must* delete it. Here's why: + * When recovery completes, we will be asked again + * for the same file from the archive using pg_standby + * so must remove trigger file so we can reload file again + * and come up correctly. + */ + rc = unlink(triggerPath); + if (rc != 0) + { + fprintf(stderr, "\n ERROR: unable to remove \"%s\", rc=%d", triggerPath, rc); + fflush(stderr); + exit(rc); + } + return true; + } + + return false; +} + +/* + * RestoreWALFileForRecovery() + * + * Perform the action required to restore the file from archive + */ +static bool +RestoreWALFileForRecovery(void) +{ + int rc = 0; + int numretries = 0; + + if (debug) + { + fprintf(stderr, "\nrunning restore :"); + fflush(stderr); + } + + while (numretries < maxretries) + { + rc = system(restoreCommand); + if (rc == 0) + { + if (debug) + { + fprintf(stderr, " success\n"); + fflush(stderr); + } + return true; + } + pg_usleep(numretries++ * sleeptime * 1000000L); + } + + /* + * Allow caller to add additional info + */ + if (debug) + fprintf(stderr, "not restored : "); + return false; +} + +static void +usage() +{ + fprintf(stderr, "\npg_standby allows Warm Standby servers to be configured\n"); + fprintf(stderr, "Usage:\n"); + fprintf(stderr, " pg_standby [OPTION]... [ARCHIVELOCATION] [NEXTWALFILE] [XLOGFILEPATH]\n"); + fprintf(stderr, " note space between [ARCHIVELOCATION] and [NEXTWALFILE]\n"); + fprintf(stderr, "with main intended use via restore_command in the recovery.conf\n"); + fprintf(stderr, " restore_command = 'pg_standby [OPTION]... [ARCHIVELOCATION] %%f %%p'\n"); + fprintf(stderr, "e.g. restore_command = 'pg_standby -l -u /mnt/server/archiverdir %%f %%p'\n"); + fprintf(stderr, "\nOptions:\n"); + fprintf(stderr, " -c copies file from archive (default)\n"); + fprintf(stderr, " -d generate lots of debugging output (testing only)\n"); + fprintf(stderr, " -k [NUMFILESTOKEEP] keeps history of # files in archives; unlinks/removes files beyond that\n"); + fprintf(stderr, " -l links into archive (leaves file in archive)\n"); + fprintf(stderr, " -t [TRIGGERFILE] defines a trigger file to initiate failover (no default)\n"); + fprintf(stderr, " -r [MAXRETRIES] maximum number of times to retry, with progressive wait (default=3)\n"); + fprintf(stderr, " -s [SLEEPTIME] number of seconds to wait between file checks (default=5)\n"); + fprintf(stderr, " -w [MAXWAITTIME] max number of seconds to wait for a file (0 disables)(default=0)\n"); + fflush(stderr); +} + +static void +sighandler(int sig) +{ + triggered = true; + signaled = true; +} + +/*------------ MAIN ----------------------------------------*/ +int +main(int argc, char **argv) +{ + int c; + + (void) signal(SIGINT, sighandler); + (void) signal(SIGQUIT, sighandler); + + while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1) + { + switch (c) + { + case 'c': /* Use copy */ + restoreCommandType = RESTORE_COMMAND_COPY; + break; + case 'd': /* Debug mode */ + debug = true; + break; + case 'k': /* keepfiles */ + keepfiles = atoi(optarg); + if (keepfiles <= 0) + { + fprintf(stderr, "usage: pg_standby -k keepfiles must be > 0\n"); + usage(); + exit(2); + } + break; + case 'l': /* Use link */ + restoreCommandType = RESTORE_COMMAND_LINK; + break; + case 'r': /* Retries */ + maxretries = atoi(optarg); + if (maxretries < 0) + { + fprintf(stderr, "usage: pg_standby -r maxretries must be > 0\n"); + usage(); + exit(2); + } + break; + case 's': /* Sleep time */ + sleeptime = atoi(optarg); + if (sleeptime <= 0 || sleeptime > 60) + { + fprintf(stderr, "usage: pg_standby -s sleeptime incorrectly set\n"); + usage(); + exit(2); + } + break; + case 't': /* Trigger file */ + triggerPath = optarg; + if (CheckForExternalTrigger()) + exit(1); /* Normal exit, with non-zero */ + break; + case 'w': /* Max wait time */ + maxwaittime = atoi(optarg); + if (maxwaittime < 0) + { + fprintf(stderr, "usage: pg_standby -w maxwaittime incorrectly set\n"); + usage(); + exit(2); + } + break; + default: + usage(); + exit(2); + break; + } + } + + /* + * Parameter checking - after checking to see if trigger file present + */ + if (argc == 1) + { + usage(); + exit(2); + } + + /* + * We will go to the archiveLocation to get nextWALFileName. + * nextWALFileName may not exist yet, which would not be an error, + * so we separate the archiveLocation and nextWALFileName so we can check + * separately whether archiveLocation exists, if not that is an error + */ + if (optind < argc) + { + archiveLocation = argv[optind]; + optind++; + } + else + { + fprintf(stderr, "pg_standby: must specify archiveLocation\n"); + usage(); + exit(2); + } + + if (optind < argc) + { + nextWALFileName = argv[optind]; + optind++; + } + else + { + fprintf(stderr, "pg_standby: use %%f to specify nextWALFileName\n"); + usage(); + exit(2); + } + + if (optind < argc) + { + xlogFilePath = argv[optind]; + optind++; + } + else + { + fprintf(stderr, "pg_standby: use %%p to specify xlogFilePath\n"); + usage(); + exit(2); + } + + CustomizableInitialize(); + + if (debug) + { + fprintf(stderr, "\nTrigger file : %s", triggerPath ? triggerPath : ""); + fprintf(stderr, "\nWaiting for WAL file : %s", WALFilePath); + fprintf(stderr, "\nWAL file path : %s", nextWALFileName); + fprintf(stderr, "\nRestoring to... : %s", xlogFilePath); + fprintf(stderr, "\nSleep interval : %d second%s", + sleeptime, (sleeptime > 1 ? "s" : " ")); + fprintf(stderr, "\nMax wait interval : %d %s", + maxwaittime, (maxwaittime > 0 ? "seconds" : "forever")); + fprintf(stderr, "\nCommand for restore : %s", restoreCommand); + if (keepfiles > 0) + fprintf(stderr, "\nNum archived files kept : last %d files", keepfiles); + else + fprintf(stderr, "\nNum archived files kept : all files"); + fflush(stderr); + } + + /* + * Check for initial history file: always the first file to be requested + * It's OK if the file isn't there - all other files need to wait + */ + if (strlen(nextWALFileName) > 8 && + strspn(nextWALFileName, "0123456789ABCDEF") == 8 && + strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"), + ".history") == 0) + { + nextWALFileType = XLOG_HISTORY; + if (RestoreWALFileForRecovery()) + exit(0); + else + { + if (debug) + { + fprintf(stderr, "history file not found\n"); + fflush(stderr); + } + exit(1); + } + } + + /* + * Main wait loop + */ + while (!CustomizableNextWALFileReady() && !triggered) + { + if (sleeptime <= 60) + pg_usleep(sleeptime * 1000000L); + + if (signaled) + { + if (debug) + { + fprintf(stderr, "\nsignaled to exit\n"); + fflush(stderr); + } + } + else + { + + if (debug) + { + fprintf(stderr, "\nWAL file not present yet."); + if (triggerPath) + fprintf(stderr, " Checking for trigger file..."); + fflush(stderr); + } + + waittime += sleeptime; + + if (!triggered && (CheckForExternalTrigger() || (waittime >= maxwaittime && maxwaittime > 0))) + { + triggered = true; + if (debug && waittime >= maxwaittime && maxwaittime > 0) + fprintf(stderr, "\nTimed out after %d seconds\n",waittime); + } + } + } + + /* + * Action on exit + */ + if (triggered) + exit(1); /* Normal exit, with non-zero */ + else + { + /* + * Once we have restored this file successfully we + * can remove some prior WAL files. + * If this restore fails we musn't remove any + * file because some of them will be requested again + * immediately after the failed restore, or when + * we restart recovery. + */ + if (RestoreWALFileForRecovery()) + CustomizableCleanupPriorWALFiles(); + exit(0); + } +}