pg_ctl promote

Fujii Masao, reviewed by Robert Haas, Stephen Frost, and Magnus Hagander.
This commit is contained in:
Robert Haas 2011-02-15 21:28:48 -05:00
parent 8ddc05fb01
commit 4695da5ae9
7 changed files with 152 additions and 21 deletions

View File

@ -615,8 +615,9 @@ protocol to make nodes agree on a serializable transactional order.
</para>
<para>
Standby mode is exited and the server switches to normal operation,
when a trigger file is found (<varname>trigger_file</>). Before failover,
Standby mode is exited and the server switches to normal operation
when <command>pg_ctl promote</> is run or a trigger file is found
(<varname>trigger_file</>). Before failover,
any WAL immediately available in the archive or in <filename>pg_xlog</> will be
restored, but no attempt is made to connect to the master.
</para>
@ -685,11 +686,7 @@ protocol to make nodes agree on a serializable transactional order.
If you're setting up the standby server for high availability purposes,
set up WAL archiving, connections and authentication like the primary
server, because the standby server will work as a primary server after
failover. You will also need to set <varname>trigger_file</> to make
it possible to fail over.
If you're setting up the standby server for reporting
purposes, with no plans to fail over to it, <varname>trigger_file</>
is not required.
failover.
</para>
<para>
@ -710,7 +707,6 @@ protocol to make nodes agree on a serializable transactional order.
standby_mode = 'on'
primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
restore_command = 'cp /path/to/archive/%f %p'
trigger_file = '/path/to/trigger_file'
archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r'
</programlisting>
</para>
@ -949,13 +945,15 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
</para>
<para>
To trigger failover of a log-shipping standby server, create a trigger
To trigger failover of a log-shipping standby server,
run <command>pg_ctl promote</> or create a trigger
file with the filename and path specified by the <varname>trigger_file</>
setting in <filename>recovery.conf</>. If <varname>trigger_file</> is
not given, there is no way to exit recovery in the standby and promote
it to a master. That can be useful for e.g reporting servers that are
setting in <filename>recovery.conf</>. If you're planning to use
<command>pg_ctl promote</> to fail over, <varname>trigger_file</> is
not required. If you're setting up the reporting servers that are
only used to offload read-only queries from the primary, not for high
availability purposes.
availability purposes, you don't need to exit recovery in the standby
and promote it to a master.
</para>
</sect1>

View File

@ -343,8 +343,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
<listitem>
<para>
Specifies a trigger file whose presence ends recovery in the
standby. If no trigger file is specified, the standby never exits
recovery.
standby. Even if this value is not set, you can still promote
the standby using <command>pg_ctl promote</>.
This setting has no effect if <varname>standby_mode</> is <literal>off</>.
</para>
</listitem>

View File

@ -75,6 +75,13 @@ PostgreSQL documentation
<arg>-o <replaceable>options</replaceable></arg>
</cmdsynopsis>
<cmdsynopsis>
<command>pg_ctl</command>
<arg choice="plain">promote</arg>
<arg>-s</arg>
<arg>-D <replaceable>datadir</replaceable></arg>
</cmdsynopsis>
<cmdsynopsis>
<command>pg_ctl</command>
<arg choice="plain">reload</arg>
@ -183,6 +190,12 @@ PostgreSQL documentation
command-line options.
</para>
<para>
In <option>promote</option> mode, the standby server that is
running in the specified data directory is commanded to exit
recovery and begin read-write operations.
</para>
<para>
<option>reload</option> mode simply sends the
<command>postgres</command> process a <systemitem>SIGHUP</>

View File

@ -62,6 +62,7 @@
/* File path names (all relative to $PGDATA) */
#define RECOVERY_COMMAND_FILE "recovery.conf"
#define RECOVERY_COMMAND_DONE "recovery.done"
#define PROMOTE_SIGNAL_FILE "promote"
/* User-settable parameters */
@ -565,6 +566,7 @@ typedef struct xl_restore_point
*/
static volatile sig_atomic_t got_SIGHUP = false;
static volatile sig_atomic_t shutdown_requested = false;
static volatile sig_atomic_t promote_triggered = false;
/*
* Flag set when executing a restore command, to tell SIGTERM signal handler
@ -9669,6 +9671,14 @@ StartupProcSigUsr1Handler(SIGNAL_ARGS)
latch_sigusr1_handler();
}
/* SIGUSR2: set flag to finish recovery */
static void
StartupProcTriggerHandler(SIGNAL_ARGS)
{
promote_triggered = true;
WakeupRecovery();
}
/* SIGHUP: set flag to re-read config file at next convenient time */
static void
StartupProcSigHupHandler(SIGNAL_ARGS)
@ -9746,7 +9756,7 @@ StartupProcessMain(void)
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, StartupProcSigUsr1Handler);
pqsignal(SIGUSR2, SIG_IGN);
pqsignal(SIGUSR2, StartupProcTriggerHandler);
/*
* Reset some signals that are accepted by postmaster but not here
@ -10192,9 +10202,9 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
}
/*
* Check to see if the trigger file exists. If it does, request postmaster
* to shut down walreceiver, wait for it to exit, remove the trigger
* file, and return true.
* Check to see whether the user-specified trigger file exists and whether a
* promote request has arrived. If either condition holds, request postmaster
* to shut down walreceiver, wait for it to exit, and return true.
*/
static bool
CheckForStandbyTrigger(void)
@ -10205,6 +10215,16 @@ CheckForStandbyTrigger(void)
if (triggered)
return true;
if (promote_triggered)
{
ereport(LOG,
(errmsg("received promote request")));
ShutdownWalRcv();
promote_triggered = false;
triggered = true;
return true;
}
if (TriggerFile == NULL)
return false;
@ -10220,6 +10240,27 @@ CheckForStandbyTrigger(void)
return false;
}
/*
* Check to see if a promote request has arrived. Should be
* called by postmaster after receiving SIGUSR1.
*/
bool
CheckPromoteSignal(void)
{
struct stat stat_buf;
if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
{
/*
* Since we are in a signal handler, it's not safe
* to elog. We silently ignore any error from unlink.
*/
unlink(PROMOTE_SIGNAL_FILE);
return true;
}
return false;
}
/*
* Wake up startup process to replay newly arrived WAL, or to notice that
* failover has been requested.

View File

@ -4284,6 +4284,14 @@ sigusr1_handler(SIGNAL_ARGS)
WalReceiverPID = StartWalReceiver();
}
if (CheckPromoteSignal() && StartupPID != 0 &&
(pmState == PM_STARTUP || pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY))
{
/* Tell startup process to finish recovery */
signal_child(StartupPID, SIGUSR2);
}
PG_SETMASK(&UnBlockSig);
errno = save_errno;

View File

@ -62,6 +62,7 @@ typedef enum
START_COMMAND,
STOP_COMMAND,
RESTART_COMMAND,
PROMOTE_COMMAND,
RELOAD_COMMAND,
STATUS_COMMAND,
KILL_COMMAND,
@ -96,6 +97,7 @@ static char postopts_file[MAXPGPATH];
static char pid_file[MAXPGPATH];
static char backup_file[MAXPGPATH];
static char recovery_file[MAXPGPATH];
static char promote_file[MAXPGPATH];
#if defined(WIN32) || defined(__CYGWIN__)
static DWORD pgctl_start_type = SERVICE_AUTO_START;
@ -124,6 +126,7 @@ static void do_init(void);
static void do_start(void);
static void do_stop(void);
static void do_restart(void);
static void do_promote(void);
static void do_reload(void);
static void do_status(void);
static void do_kill(pgpid_t pid);
@ -872,7 +875,7 @@ do_stop(void)
/*
* restart/reload routines
* restart/promote/reload routines
*/
static void
@ -965,6 +968,66 @@ do_restart(void)
do_start();
}
static void
do_promote(void)
{
FILE *prmfile;
pgpid_t pid;
struct stat statbuf;
pid = get_pgpid();
if (pid == 0) /* no pid file */
{
write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
write_stderr(_("Is server running?\n"));
exit(1);
}
else if (pid < 0) /* standalone backend, not postmaster */
{
pid = -pid;
write_stderr(_("%s: cannot promote server; "
"single-user server is running (PID: %ld)\n"),
progname, pid);
exit(1);
}
/* If recovery.conf doesn't exist, the server is not in standby mode */
if (stat(recovery_file, &statbuf) != 0)
{
write_stderr(_("%s: cannot promote server; "
"server is not in standby mode\n"),
progname);
exit(1);
}
if ((prmfile = fopen(promote_file, "w")) == NULL)
{
write_stderr(_("%s: could not create promote signal file \"%s\": %s\n"),
progname, promote_file, strerror(errno));
exit(1);
}
if (fclose(prmfile))
{
write_stderr(_("%s: could not write promote signal file \"%s\": %s\n"),
progname, promote_file, strerror(errno));
exit(1);
}
sig = SIGUSR1;
if (kill((pid_t) pid, sig) != 0)
{
write_stderr(_("%s: could not send promote signal (PID: %ld): %s\n"),
progname, pid, strerror(errno));
if (unlink(promote_file) != 0)
write_stderr(_("%s: could not remove promote signal file \"%s\": %s\n"),
progname, promote_file, strerror(errno));
exit(1);
}
print_msg(_("server promoting\n"));
}
static void
do_reload(void)
@ -1617,7 +1680,7 @@ do_advice(void)
static void
do_help(void)
{
printf(_("%s is a utility to start, stop, restart, reload configuration files,\n"
printf(_("%s is a utility to start, stop, restart, promote, reload configuration files,\n"
"report the status of a PostgreSQL server, or signal a PostgreSQL process.\n\n"), progname);
printf(_("Usage:\n"));
printf(_(" %s init[db] [-D DATADIR] [-s] [-o \"OPTIONS\"]\n"), progname);
@ -1625,6 +1688,7 @@ do_help(void)
printf(_(" %s stop [-W] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n"), progname);
printf(_(" %s restart [-w] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n"
" [-o \"OPTIONS\"]\n"), progname);
printf(_(" %s promote [-D DATADIR] [-s]\n"), progname);
printf(_(" %s reload [-D DATADIR] [-s]\n"), progname);
printf(_(" %s status [-D DATADIR]\n"), progname);
printf(_(" %s kill SIGNALNAME PID\n"), progname);
@ -1950,6 +2014,8 @@ main(int argc, char **argv)
ctl_command = STOP_COMMAND;
else if (strcmp(argv[optind], "restart") == 0)
ctl_command = RESTART_COMMAND;
else if (strcmp(argv[optind], "promote") == 0)
ctl_command = PROMOTE_COMMAND;
else if (strcmp(argv[optind], "reload") == 0)
ctl_command = RELOAD_COMMAND;
else if (strcmp(argv[optind], "status") == 0)
@ -2036,6 +2102,7 @@ main(int argc, char **argv)
snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data);
snprintf(backup_file, MAXPGPATH, "%s/backup_label", pg_data);
snprintf(recovery_file, MAXPGPATH, "%s/recovery.conf", pg_data);
snprintf(promote_file, MAXPGPATH, "%s/promote", pg_data);
}
switch (ctl_command)
@ -2055,6 +2122,9 @@ main(int argc, char **argv)
case RESTART_COMMAND:
do_restart();
break;
case PROMOTE_COMMAND:
do_promote();
break;
case RELOAD_COMMAND:
do_reload();
break;

View File

@ -313,6 +313,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
extern void HandleStartupProcInterrupts(void);
extern void StartupProcessMain(void);
extern bool CheckPromoteSignal(void);
extern void WakeupRecovery(void);
/*