mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
8fd733bd19
file to be a symlink. We tried to fix this issue with an earlier server-side patch, but it didn't fix the whole issue. The same bug is present in older releases as well, but the 8.4 train is about to leave the station, and I'm not sure if have consensus on whether we can remove the -l option in back-branches or do we need to attempt a server-side fix to make symlinking safe. Patch by Simon Riggs, per discussion on bug identified by Fujii Masao.
831 lines
22 KiB
C
831 lines
22 KiB
C
/*
|
|
* $PostgreSQL: pgsql/contrib/pg_standby/pg_standby.c,v 1.25 2009/06/25 12:03:10 heikki Exp $
|
|
*
|
|
*
|
|
* pg_standby.c
|
|
*
|
|
* Production-ready example of how to create a Warm Standby
|
|
* database server using continuous archiving as a
|
|
* replication mechanism
|
|
*
|
|
* We separate the parameters for archive and nextWALfile
|
|
* so that we can check the archive exists, even if the
|
|
* WAL file doesn't (yet).
|
|
*
|
|
* This program will be executed once in full for each file
|
|
* requested by the warm standby server.
|
|
*
|
|
* It is designed to cater to a variety of needs, as well
|
|
* providing a customizable section.
|
|
*
|
|
* Original author: Simon Riggs simon@2ndquadrant.com
|
|
* Current maintainer: Simon Riggs
|
|
*/
|
|
#include "postgres_fe.h"
|
|
|
|
#include <ctype.h>
|
|
#include <dirent.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <signal.h>
|
|
|
|
#ifdef WIN32
|
|
int getopt(int argc, char *const argv[], const char *optstring);
|
|
#else
|
|
#include <sys/time.h>
|
|
#include <unistd.h>
|
|
|
|
#ifdef HAVE_GETOPT_H
|
|
#include <getopt.h>
|
|
#endif
|
|
#endif /* ! WIN32 */
|
|
|
|
extern char *optarg;
|
|
extern int optind;
|
|
|
|
const char *progname;
|
|
|
|
/* Options and defaults */
|
|
int sleeptime = 5; /* amount of time to sleep between file checks */
|
|
int waittime = -1; /* how long we have been waiting, -1 no wait
|
|
* yet */
|
|
int maxwaittime = 0; /* how long are we prepared to wait for? */
|
|
int keepfiles = 0; /* number of WAL files to keep, 0 keep all */
|
|
int maxretries = 3; /* number of retries on restore command */
|
|
bool debug = false; /* are we debugging? */
|
|
bool need_cleanup = false; /* do we need to remove files from
|
|
* archive? */
|
|
|
|
static volatile sig_atomic_t signaled = false;
|
|
|
|
char *archiveLocation; /* where to find the archive? */
|
|
char *triggerPath; /* where to find the trigger file? */
|
|
char *xlogFilePath; /* where we are going to restore to */
|
|
char *nextWALFileName; /* the file we need to get from archive */
|
|
char *restartWALFileName; /* the file from which we can restart restore */
|
|
char *priorWALFileName; /* the file we need to get from archive */
|
|
char WALFilePath[MAXPGPATH]; /* the file path including archive */
|
|
char restoreCommand[MAXPGPATH]; /* run this to restore */
|
|
char exclusiveCleanupFileName[MAXPGPATH]; /* the file we need to
|
|
* get from archive */
|
|
|
|
/*
|
|
* Two types of failover are supported (smart and fast failover).
|
|
*
|
|
* The content of the trigger file determines the type of failover. If the
|
|
* trigger file contains the word "smart" (or the file is empty), smart
|
|
* failover is chosen: pg_standby acts as cp or ln command itself, on
|
|
* successful completion all the available WAL records will be applied
|
|
* resulting in zero data loss. But, it might take a long time to finish
|
|
* recovery if there's a lot of unapplied WAL.
|
|
*
|
|
* On the other hand, if the trigger file contains the word "fast", the
|
|
* recovery is finished immediately even if unapplied WAL files remain. Any
|
|
* transactions in the unapplied WAL files are lost.
|
|
*
|
|
* An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
|
|
* fast failover. A timeout causes fast failover (smart failover would have
|
|
* the same effect, since if the timeout is reached there is no unapplied WAL).
|
|
*/
|
|
#define NoFailover 0
|
|
#define SmartFailover 1
|
|
#define FastFailover 2
|
|
|
|
static int Failover = NoFailover;
|
|
|
|
#define RESTORE_COMMAND_COPY 0
|
|
#define RESTORE_COMMAND_LINK 1
|
|
int restoreCommandType;
|
|
|
|
#define XLOG_DATA 0
|
|
#define XLOG_HISTORY 1
|
|
#define XLOG_BACKUP_LABEL 2
|
|
int nextWALFileType;
|
|
|
|
#define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
|
|
snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
|
|
|
|
struct stat stat_buf;
|
|
|
|
/* =====================================================================
|
|
*
|
|
* Customizable section
|
|
*
|
|
* =====================================================================
|
|
*
|
|
* Currently, this section assumes that the Archive is a locally
|
|
* accessible directory. If you want to make other assumptions,
|
|
* such as using a vendor-specific archive and access API, these
|
|
* routines are the ones you'll need to change. You're
|
|
* enouraged to submit any changes to pgsql-hackers@postgresql.org
|
|
* or personally to the current maintainer. Those changes may be
|
|
* folded in to later versions of this program.
|
|
*/
|
|
|
|
#define XLOG_DATA_FNAME_LEN 24
|
|
/* Reworked from access/xlog_internal.h */
|
|
#define XLogFileName(fname, tli, log, seg) \
|
|
snprintf(fname, XLOG_DATA_FNAME_LEN + 1, "%08X%08X%08X", tli, log, seg)
|
|
|
|
/*
|
|
* Initialize allows customized commands into the warm standby program.
|
|
*
|
|
* As an example, and probably the common case, we use either
|
|
* cp/ln commands on *nix, or copy/move command on Windows.
|
|
*/
|
|
static void
|
|
CustomizableInitialize(void)
|
|
{
|
|
#ifdef WIN32
|
|
snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
|
|
switch (restoreCommandType)
|
|
{
|
|
case RESTORE_COMMAND_LINK:
|
|
SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
|
|
break;
|
|
case RESTORE_COMMAND_COPY:
|
|
default:
|
|
SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
|
|
break;
|
|
}
|
|
#else
|
|
snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
|
|
switch (restoreCommandType)
|
|
{
|
|
case RESTORE_COMMAND_LINK:
|
|
#if HAVE_WORKING_LINK
|
|
SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
|
|
break;
|
|
#endif
|
|
case RESTORE_COMMAND_COPY:
|
|
default:
|
|
SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* This code assumes that archiveLocation is a directory You may wish to
|
|
* add code to check for tape libraries, etc.. So, since it is a
|
|
* directory, we use stat to test if its accessible
|
|
*/
|
|
if (stat(archiveLocation, &stat_buf) != 0)
|
|
{
|
|
fprintf(stderr, "%s: archiveLocation \"%s\" does not exist\n", progname, archiveLocation);
|
|
fflush(stderr);
|
|
exit(2);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* CustomizableNextWALFileReady()
|
|
*
|
|
* Is the requested file ready yet?
|
|
*/
|
|
static bool
|
|
CustomizableNextWALFileReady()
|
|
{
|
|
if (stat(WALFilePath, &stat_buf) == 0)
|
|
{
|
|
/*
|
|
* If its a backup file, return immediately If its a regular file
|
|
* return only if its the right size already
|
|
*/
|
|
if (strlen(nextWALFileName) > 24 &&
|
|
strspn(nextWALFileName, "0123456789ABCDEF") == 24 &&
|
|
strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"),
|
|
".backup") == 0)
|
|
{
|
|
nextWALFileType = XLOG_BACKUP_LABEL;
|
|
return true;
|
|
}
|
|
else if (stat_buf.st_size == XLOG_SEG_SIZE)
|
|
{
|
|
#ifdef WIN32
|
|
|
|
/*
|
|
* Windows 'cp' sets the final file size before the copy is
|
|
* complete, and not yet ready to be opened by pg_standby. So we
|
|
* wait for sleeptime secs before attempting to restore. If that
|
|
* is not enough, we will rely on the retry/holdoff mechanism.
|
|
* GNUWin32's cp does not have this problem.
|
|
*/
|
|
pg_usleep(sleeptime * 1000000L);
|
|
#endif
|
|
nextWALFileType = XLOG_DATA;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* If still too small, wait until it is the correct size
|
|
*/
|
|
if (stat_buf.st_size > XLOG_SEG_SIZE)
|
|
{
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, "file size greater than expected\n");
|
|
fflush(stderr);
|
|
}
|
|
exit(3);
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
#define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE )
|
|
|
|
static void
|
|
CustomizableCleanupPriorWALFiles(void)
|
|
{
|
|
/*
|
|
* Work out name of prior file from current filename
|
|
*/
|
|
if (nextWALFileType == XLOG_DATA)
|
|
{
|
|
int rc;
|
|
DIR *xldir;
|
|
struct dirent *xlde;
|
|
|
|
/*
|
|
* Assume its OK to keep failing. The failure situation may change
|
|
* over time, so we'd rather keep going on the main processing than
|
|
* fail because we couldnt clean up yet.
|
|
*/
|
|
if ((xldir = opendir(archiveLocation)) != NULL)
|
|
{
|
|
while ((xlde = readdir(xldir)) != NULL)
|
|
{
|
|
/*
|
|
* We ignore the timeline part of the XLOG segment identifiers
|
|
* in deciding whether a segment is still needed. This
|
|
* ensures that we won't prematurely remove a segment from a
|
|
* parent timeline. We could probably be a little more
|
|
* proactive about removing segments of non-parent timelines,
|
|
* but that would be a whole lot more complicated.
|
|
*
|
|
* We use the alphanumeric sorting property of the filenames
|
|
* to decide which ones are earlier than the
|
|
* exclusiveCleanupFileName file. Note that this means files
|
|
* are not removed in the order they were originally written,
|
|
* in case this worries you.
|
|
*/
|
|
if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN &&
|
|
strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN &&
|
|
strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
|
|
{
|
|
#ifdef WIN32
|
|
snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name);
|
|
#else
|
|
snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name);
|
|
#endif
|
|
|
|
if (debug)
|
|
fprintf(stderr, "\nremoving \"%s\"", WALFilePath);
|
|
|
|
rc = unlink(WALFilePath);
|
|
if (rc != 0)
|
|
{
|
|
fprintf(stderr, "\n%s: ERROR failed to remove \"%s\": %s",
|
|
progname, WALFilePath, strerror(errno));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (debug)
|
|
fprintf(stderr, "\n");
|
|
}
|
|
else
|
|
fprintf(stderr, "%s: archiveLocation \"%s\" open error\n", progname, archiveLocation);
|
|
|
|
closedir(xldir);
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
|
|
/* =====================================================================
|
|
* End of Customizable section
|
|
* =====================================================================
|
|
*/
|
|
|
|
/*
|
|
* SetWALFileNameForCleanup()
|
|
*
|
|
* Set the earliest WAL filename that we want to keep on the archive
|
|
* and decide whether we need_cleanup
|
|
*/
|
|
static bool
|
|
SetWALFileNameForCleanup(void)
|
|
{
|
|
uint32 tli = 1,
|
|
log = 0,
|
|
seg = 0;
|
|
uint32 log_diff = 0,
|
|
seg_diff = 0;
|
|
bool cleanup = false;
|
|
|
|
if (restartWALFileName)
|
|
{
|
|
/*
|
|
* Don't do cleanup if the restartWALFileName provided is later than
|
|
* the xlog file requested. This is an error and we must not remove
|
|
* these files from archive. This shouldn't happen, but better safe
|
|
* than sorry.
|
|
*/
|
|
if (strcmp(restartWALFileName, nextWALFileName) > 0)
|
|
return false;
|
|
|
|
strcpy(exclusiveCleanupFileName, restartWALFileName);
|
|
return true;
|
|
}
|
|
|
|
if (keepfiles > 0)
|
|
{
|
|
sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
|
|
if (tli > 0 && log >= 0 && seg > 0)
|
|
{
|
|
log_diff = keepfiles / MaxSegmentsPerLogFile;
|
|
seg_diff = keepfiles % MaxSegmentsPerLogFile;
|
|
if (seg_diff > seg)
|
|
{
|
|
log_diff++;
|
|
seg = MaxSegmentsPerLogFile - (seg_diff - seg);
|
|
}
|
|
else
|
|
seg -= seg_diff;
|
|
|
|
if (log >= log_diff)
|
|
{
|
|
log -= log_diff;
|
|
cleanup = true;
|
|
}
|
|
else
|
|
{
|
|
log = 0;
|
|
seg = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
XLogFileName(exclusiveCleanupFileName, tli, log, seg);
|
|
|
|
return cleanup;
|
|
}
|
|
|
|
/*
|
|
* CheckForExternalTrigger()
|
|
*
|
|
* Is there a trigger file? Sets global 'Failover' variable to indicate
|
|
* what kind of a trigger file it was. A "fast" trigger file is turned
|
|
* into a "smart" file as a side-effect.
|
|
*/
|
|
static void
|
|
CheckForExternalTrigger(void)
|
|
{
|
|
char buf[32];
|
|
int fd;
|
|
int len;
|
|
|
|
/*
|
|
* Look for a trigger file, if that option has been selected
|
|
*
|
|
* We use stat() here because triggerPath is always a file rather than
|
|
* potentially being in an archive
|
|
*/
|
|
if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
|
|
return;
|
|
|
|
/*
|
|
* An empty trigger file performs smart failover. There's a little race
|
|
* condition here: if the writer of the trigger file has just created the
|
|
* file, but not yet written anything to it, we'll treat that as smart
|
|
* shutdown even if the other process was just about to write "fast" to
|
|
* it. But that's fine: we'll restore one more WAL file, and when we're
|
|
* invoked next time, we'll see the word "fast" and fail over immediately.
|
|
*/
|
|
if (stat_buf.st_size == 0)
|
|
{
|
|
Failover = SmartFailover;
|
|
fprintf(stderr, "trigger file found: smart failover\n");
|
|
fflush(stderr);
|
|
return;
|
|
}
|
|
|
|
if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
|
|
{
|
|
fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
|
|
triggerPath, strerror(errno));
|
|
fflush(stderr);
|
|
return;
|
|
}
|
|
|
|
if ((len = read(fd, buf, sizeof(buf))) < 0)
|
|
{
|
|
fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
|
|
triggerPath, strerror(errno));
|
|
fflush(stderr);
|
|
close(fd);
|
|
return;
|
|
}
|
|
buf[len] = '\0';
|
|
|
|
if (strncmp(buf, "smart", 5) == 0)
|
|
{
|
|
Failover = SmartFailover;
|
|
fprintf(stderr, "trigger file found: smart failover\n");
|
|
fflush(stderr);
|
|
close(fd);
|
|
return;
|
|
}
|
|
|
|
if (strncmp(buf, "fast", 4) == 0)
|
|
{
|
|
Failover = FastFailover;
|
|
|
|
fprintf(stderr, "trigger file found: fast failover\n");
|
|
fflush(stderr);
|
|
|
|
/*
|
|
* Turn it into a "smart" trigger by truncating the file. Otherwise if
|
|
* the server asks us again to restore a segment that was restored
|
|
* already, we would return "not found" and upset the server.
|
|
*/
|
|
if (ftruncate(fd, 0) < 0)
|
|
{
|
|
fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
|
|
triggerPath, strerror(errno));
|
|
fflush(stderr);
|
|
}
|
|
close(fd);
|
|
|
|
return;
|
|
}
|
|
close(fd);
|
|
|
|
fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
|
|
fflush(stderr);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* RestoreWALFileForRecovery()
|
|
*
|
|
* Perform the action required to restore the file from archive
|
|
*/
|
|
static bool
|
|
RestoreWALFileForRecovery(void)
|
|
{
|
|
int rc = 0;
|
|
int numretries = 0;
|
|
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, "running restore :");
|
|
fflush(stderr);
|
|
}
|
|
|
|
while (numretries <= maxretries)
|
|
{
|
|
rc = system(restoreCommand);
|
|
if (rc == 0)
|
|
{
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, " OK\n");
|
|
fflush(stderr);
|
|
}
|
|
return true;
|
|
}
|
|
pg_usleep(numretries++ * sleeptime * 1000000L);
|
|
}
|
|
|
|
/*
|
|
* Allow caller to add additional info
|
|
*/
|
|
if (debug)
|
|
fprintf(stderr, "not restored\n");
|
|
return false;
|
|
}
|
|
|
|
static void
|
|
usage(void)
|
|
{
|
|
printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
|
|
printf("Usage:\n");
|
|
printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
|
|
printf("\n"
|
|
"with main intended use as a restore_command in the recovery.conf:\n"
|
|
" restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
|
|
"e.g.\n"
|
|
" restore_command = 'pg_standby -l /mnt/server/archiverdir %%f %%p %%r'\n");
|
|
printf("\nOptions:\n");
|
|
printf(" -c copies file from archive (default)\n");
|
|
printf(" -d generate lots of debugging output (testing only)\n");
|
|
printf(" -k NUMFILESTOKEEP if RESTARTWALFILE not used, removes files prior to limit\n"
|
|
" (0 keeps all)\n");
|
|
printf(" -l does nothing; use of link is now deprecated\n");
|
|
printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n"
|
|
" (default=3)\n");
|
|
printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n"
|
|
" default=5)\n");
|
|
printf(" -t TRIGGERFILE defines a trigger file to initiate failover (no default)\n");
|
|
printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n");
|
|
printf(" --help show this help, then exit\n");
|
|
printf(" --version output version information, then exit\n");
|
|
printf("\nReport bugs to <pgsql-bugs@postgresql.org>.\n");
|
|
}
|
|
|
|
static void
|
|
sighandler(int sig)
|
|
{
|
|
signaled = true;
|
|
}
|
|
|
|
#ifndef WIN32
|
|
/* We don't want SIGQUIT to core dump */
|
|
static void
|
|
sigquit_handler(int sig)
|
|
{
|
|
signal(SIGINT, SIG_DFL);
|
|
kill(getpid(), SIGINT);
|
|
}
|
|
#endif
|
|
|
|
/*------------ MAIN ----------------------------------------*/
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
int c;
|
|
|
|
progname = get_progname(argv[0]);
|
|
|
|
if (argc > 1)
|
|
{
|
|
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
|
|
{
|
|
usage();
|
|
exit(0);
|
|
}
|
|
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
|
{
|
|
puts("pg_standby (PostgreSQL) " PG_VERSION);
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* You can send SIGUSR1 to trigger failover.
|
|
*
|
|
* Postmaster uses SIGQUIT to request immediate shutdown. The default
|
|
* action is to core dump, but we don't want that, so trap it and commit
|
|
* suicide without core dump.
|
|
*
|
|
* We used to use SIGINT and SIGQUIT to trigger failover, but that turned
|
|
* out to be a bad idea because postmaster uses SIGQUIT to request
|
|
* immediate shutdown. We still trap SIGINT, but that may change in a
|
|
* future release.
|
|
*/
|
|
(void) signal(SIGUSR1, sighandler);
|
|
(void) signal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */
|
|
#ifndef WIN32
|
|
(void) signal(SIGQUIT, sigquit_handler);
|
|
#endif
|
|
|
|
while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
|
|
{
|
|
switch (c)
|
|
{
|
|
case 'c': /* Use copy */
|
|
restoreCommandType = RESTORE_COMMAND_COPY;
|
|
break;
|
|
case 'd': /* Debug mode */
|
|
debug = true;
|
|
break;
|
|
case 'k': /* keepfiles */
|
|
keepfiles = atoi(optarg);
|
|
if (keepfiles < 0)
|
|
{
|
|
fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
case 'l': /* Use link */
|
|
/*
|
|
* Link feature disabled, possibly permanently. Linking
|
|
* causes a problem after recovery ends that is not currently
|
|
* resolved by PostgreSQL. 25 Jun 2009
|
|
restoreCommandType = RESTORE_COMMAND_LINK;
|
|
*/
|
|
break;
|
|
case 'r': /* Retries */
|
|
maxretries = atoi(optarg);
|
|
if (maxretries < 0)
|
|
{
|
|
fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
case 's': /* Sleep time */
|
|
sleeptime = atoi(optarg);
|
|
if (sleeptime <= 0 || sleeptime > 60)
|
|
{
|
|
fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
case 't': /* Trigger file */
|
|
triggerPath = optarg;
|
|
break;
|
|
case 'w': /* Max wait time */
|
|
maxwaittime = atoi(optarg);
|
|
if (maxwaittime < 0)
|
|
{
|
|
fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
default:
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Parameter checking - after checking to see if trigger file present
|
|
*/
|
|
if (argc == 1)
|
|
{
|
|
fprintf(stderr, "%s: not enough command-line arguments\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
/*
|
|
* We will go to the archiveLocation to get nextWALFileName.
|
|
* nextWALFileName may not exist yet, which would not be an error, so we
|
|
* separate the archiveLocation and nextWALFileName so we can check
|
|
* separately whether archiveLocation exists, if not that is an error
|
|
*/
|
|
if (optind < argc)
|
|
{
|
|
archiveLocation = argv[optind];
|
|
optind++;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr, "%s: must specify archive location\n", progname);
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
if (optind < argc)
|
|
{
|
|
nextWALFileName = argv[optind];
|
|
optind++;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr, "%s: use %%f to specify nextWALFileName\n", progname);
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
if (optind < argc)
|
|
{
|
|
xlogFilePath = argv[optind];
|
|
optind++;
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr, "%s: use %%p to specify xlogFilePath\n", progname);
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
if (optind < argc)
|
|
{
|
|
restartWALFileName = argv[optind];
|
|
optind++;
|
|
}
|
|
|
|
CustomizableInitialize();
|
|
|
|
need_cleanup = SetWALFileNameForCleanup();
|
|
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, "Trigger file : %s\n", triggerPath ? triggerPath : "<not set>");
|
|
fprintf(stderr, "Waiting for WAL file : %s\n", nextWALFileName);
|
|
fprintf(stderr, "WAL file path : %s\n", WALFilePath);
|
|
fprintf(stderr, "Restoring to : %s\n", xlogFilePath);
|
|
fprintf(stderr, "Sleep interval : %d second%s\n",
|
|
sleeptime, (sleeptime > 1 ? "s" : " "));
|
|
fprintf(stderr, "Max wait interval : %d %s\n",
|
|
maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
|
|
fprintf(stderr, "Command for restore : %s\n", restoreCommand);
|
|
fprintf(stderr, "Keep archive history : ");
|
|
if (need_cleanup)
|
|
fprintf(stderr, "%s and later\n", exclusiveCleanupFileName);
|
|
else
|
|
fprintf(stderr, "No cleanup required\n");
|
|
fflush(stderr);
|
|
}
|
|
|
|
/*
|
|
* Check for initial history file: always the first file to be requested
|
|
* It's OK if the file isn't there - all other files need to wait
|
|
*/
|
|
if (strlen(nextWALFileName) > 8 &&
|
|
strspn(nextWALFileName, "0123456789ABCDEF") == 8 &&
|
|
strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"),
|
|
".history") == 0)
|
|
{
|
|
nextWALFileType = XLOG_HISTORY;
|
|
if (RestoreWALFileForRecovery())
|
|
exit(0);
|
|
else
|
|
{
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, "history file not found\n");
|
|
fflush(stderr);
|
|
}
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Main wait loop
|
|
*/
|
|
for (;;)
|
|
{
|
|
/* Check for trigger file or signal first */
|
|
CheckForExternalTrigger();
|
|
if (signaled)
|
|
{
|
|
Failover = FastFailover;
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, "signaled to exit: fast failover\n");
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check for fast failover immediately, before checking if the
|
|
* requested WAL file is available
|
|
*/
|
|
if (Failover == FastFailover)
|
|
exit(1);
|
|
|
|
if (CustomizableNextWALFileReady())
|
|
{
|
|
/*
|
|
* Once we have restored this file successfully we can remove some
|
|
* prior WAL files. If this restore fails we musn't remove any
|
|
* file because some of them will be requested again immediately
|
|
* after the failed restore, or when we restart recovery.
|
|
*/
|
|
if (RestoreWALFileForRecovery())
|
|
{
|
|
if (need_cleanup)
|
|
CustomizableCleanupPriorWALFiles();
|
|
|
|
exit(0);
|
|
}
|
|
else
|
|
{
|
|
/* Something went wrong in copying the file */
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
/* Check for smart failover if the next WAL file was not available */
|
|
if (Failover == SmartFailover)
|
|
exit(1);
|
|
|
|
if (sleeptime <= 60)
|
|
pg_usleep(sleeptime * 1000000L);
|
|
|
|
waittime += sleeptime;
|
|
if (waittime >= maxwaittime && maxwaittime > 0)
|
|
{
|
|
Failover = FastFailover;
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, "Timed out after %d seconds: fast failover\n",
|
|
waittime);
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
if (debug)
|
|
{
|
|
fprintf(stderr, "WAL file not present yet.");
|
|
if (triggerPath)
|
|
fprintf(stderr, " Checking for trigger file...");
|
|
fprintf(stderr, "\n");
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
}
|