diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index a89296905b..368c68867a 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -615,8 +615,9 @@ protocol to make nodes agree on a serializable transactional order. - Standby mode is exited and the server switches to normal operation, - when a trigger file is found (trigger_file). Before failover, + Standby mode is exited and the server switches to normal operation + when pg_ctl promote is run or a trigger file is found + (trigger_file). Before failover, any WAL immediately available in the archive or in pg_xlog will be restored, but no attempt is made to connect to the master. @@ -685,11 +686,7 @@ protocol to make nodes agree on a serializable transactional order. If you're setting up the standby server for high availability purposes, set up WAL archiving, connections and authentication like the primary server, because the standby server will work as a primary server after - failover. You will also need to set trigger_file to make - it possible to fail over. - If you're setting up the standby server for reporting - purposes, with no plans to fail over to it, trigger_file - is not required. + failover. @@ -710,7 +707,6 @@ protocol to make nodes agree on a serializable transactional order. standby_mode = 'on' primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass' restore_command = 'cp /path/to/archive/%f %p' -trigger_file = '/path/to/trigger_file' archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r' @@ -949,13 +945,15 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass' - To trigger failover of a log-shipping standby server, create a trigger + To trigger failover of a log-shipping standby server, + run pg_ctl promote or create a trigger file with the filename and path specified by the trigger_file - setting in recovery.conf. If trigger_file is - not given, there is no way to exit recovery in the standby and promote - it to a master. That can be useful for e.g reporting servers that are + setting in recovery.conf. If you're planning to use + pg_ctl promote to fail over, trigger_file is + not required. If you're setting up the reporting servers that are only used to offload read-only queries from the primary, not for high - availability purposes. + availability purposes, you don't need to exit recovery in the standby + and promote it to a master. diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml index 0ccd65e3ee..602fbe2c76 100644 --- a/doc/src/sgml/recovery-config.sgml +++ b/doc/src/sgml/recovery-config.sgml @@ -343,8 +343,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows Specifies a trigger file whose presence ends recovery in the - standby. If no trigger file is specified, the standby never exits - recovery. + standby. Even if this value is not set, you can still promote + the standby using pg_ctl promote. This setting has no effect if standby_mode is off. diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml index 28f415da24..307f66b8da 100644 --- a/doc/src/sgml/ref/pg_ctl-ref.sgml +++ b/doc/src/sgml/ref/pg_ctl-ref.sgml @@ -75,6 +75,13 @@ PostgreSQL documentation -o options + + pg_ctl + promote + -s + -D datadir + + pg_ctl reload @@ -183,6 +190,12 @@ PostgreSQL documentation command-line options. + + In mode, the standby server that is + running in the specified data directory is commanded to exit + recovery and begin read-write operations. + + mode simply sends the postgres process a SIGHUP diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 4dc8dc6e39..6fdaaff914 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -62,6 +62,7 @@ /* File path names (all relative to $PGDATA) */ #define RECOVERY_COMMAND_FILE "recovery.conf" #define RECOVERY_COMMAND_DONE "recovery.done" +#define PROMOTE_SIGNAL_FILE "promote" /* User-settable parameters */ @@ -565,6 +566,7 @@ typedef struct xl_restore_point */ static volatile sig_atomic_t got_SIGHUP = false; static volatile sig_atomic_t shutdown_requested = false; +static volatile sig_atomic_t promote_triggered = false; /* * Flag set when executing a restore command, to tell SIGTERM signal handler @@ -9669,6 +9671,14 @@ StartupProcSigUsr1Handler(SIGNAL_ARGS) latch_sigusr1_handler(); } +/* SIGUSR2: set flag to finish recovery */ +static void +StartupProcTriggerHandler(SIGNAL_ARGS) +{ + promote_triggered = true; + WakeupRecovery(); +} + /* SIGHUP: set flag to re-read config file at next convenient time */ static void StartupProcSigHupHandler(SIGNAL_ARGS) @@ -9746,7 +9756,7 @@ StartupProcessMain(void) pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, StartupProcSigUsr1Handler); - pqsignal(SIGUSR2, SIG_IGN); + pqsignal(SIGUSR2, StartupProcTriggerHandler); /* * Reset some signals that are accepted by postmaster but not here @@ -10192,9 +10202,9 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr) } /* - * Check to see if the trigger file exists. If it does, request postmaster - * to shut down walreceiver, wait for it to exit, remove the trigger - * file, and return true. + * Check to see whether the user-specified trigger file exists and whether a + * promote request has arrived. If either condition holds, request postmaster + * to shut down walreceiver, wait for it to exit, and return true. */ static bool CheckForStandbyTrigger(void) @@ -10205,6 +10215,16 @@ CheckForStandbyTrigger(void) if (triggered) return true; + if (promote_triggered) + { + ereport(LOG, + (errmsg("received promote request"))); + ShutdownWalRcv(); + promote_triggered = false; + triggered = true; + return true; + } + if (TriggerFile == NULL) return false; @@ -10220,6 +10240,27 @@ CheckForStandbyTrigger(void) return false; } +/* + * Check to see if a promote request has arrived. Should be + * called by postmaster after receiving SIGUSR1. + */ +bool +CheckPromoteSignal(void) +{ + struct stat stat_buf; + + if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0) + { + /* + * Since we are in a signal handler, it's not safe + * to elog. We silently ignore any error from unlink. + */ + unlink(PROMOTE_SIGNAL_FILE); + return true; + } + return false; +} + /* * Wake up startup process to replay newly arrived WAL, or to notice that * failover has been requested. diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 8f77d1bfc9..997af5bf07 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -4284,6 +4284,14 @@ sigusr1_handler(SIGNAL_ARGS) WalReceiverPID = StartWalReceiver(); } + if (CheckPromoteSignal() && StartupPID != 0 && + (pmState == PM_STARTUP || pmState == PM_RECOVERY || + pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY)) + { + /* Tell startup process to finish recovery */ + signal_child(StartupPID, SIGUSR2); + } + PG_SETMASK(&UnBlockSig); errno = save_errno; diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index 6c87f158f3..2fab5c98fd 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -62,6 +62,7 @@ typedef enum START_COMMAND, STOP_COMMAND, RESTART_COMMAND, + PROMOTE_COMMAND, RELOAD_COMMAND, STATUS_COMMAND, KILL_COMMAND, @@ -96,6 +97,7 @@ static char postopts_file[MAXPGPATH]; static char pid_file[MAXPGPATH]; static char backup_file[MAXPGPATH]; static char recovery_file[MAXPGPATH]; +static char promote_file[MAXPGPATH]; #if defined(WIN32) || defined(__CYGWIN__) static DWORD pgctl_start_type = SERVICE_AUTO_START; @@ -124,6 +126,7 @@ static void do_init(void); static void do_start(void); static void do_stop(void); static void do_restart(void); +static void do_promote(void); static void do_reload(void); static void do_status(void); static void do_kill(pgpid_t pid); @@ -872,7 +875,7 @@ do_stop(void) /* - * restart/reload routines + * restart/promote/reload routines */ static void @@ -965,6 +968,66 @@ do_restart(void) do_start(); } +static void +do_promote(void) +{ + FILE *prmfile; + pgpid_t pid; + struct stat statbuf; + + pid = get_pgpid(); + + if (pid == 0) /* no pid file */ + { + write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file); + write_stderr(_("Is server running?\n")); + exit(1); + } + else if (pid < 0) /* standalone backend, not postmaster */ + { + pid = -pid; + write_stderr(_("%s: cannot promote server; " + "single-user server is running (PID: %ld)\n"), + progname, pid); + exit(1); + } + + /* If recovery.conf doesn't exist, the server is not in standby mode */ + if (stat(recovery_file, &statbuf) != 0) + { + write_stderr(_("%s: cannot promote server; " + "server is not in standby mode\n"), + progname); + exit(1); + } + + if ((prmfile = fopen(promote_file, "w")) == NULL) + { + write_stderr(_("%s: could not create promote signal file \"%s\": %s\n"), + progname, promote_file, strerror(errno)); + exit(1); + } + if (fclose(prmfile)) + { + write_stderr(_("%s: could not write promote signal file \"%s\": %s\n"), + progname, promote_file, strerror(errno)); + exit(1); + } + + sig = SIGUSR1; + if (kill((pid_t) pid, sig) != 0) + { + write_stderr(_("%s: could not send promote signal (PID: %ld): %s\n"), + progname, pid, strerror(errno)); + if (unlink(promote_file) != 0) + write_stderr(_("%s: could not remove promote signal file \"%s\": %s\n"), + progname, promote_file, strerror(errno)); + exit(1); + } + + print_msg(_("server promoting\n")); +} + static void do_reload(void) @@ -1617,7 +1680,7 @@ do_advice(void) static void do_help(void) { - printf(_("%s is a utility to start, stop, restart, reload configuration files,\n" + printf(_("%s is a utility to start, stop, restart, promote, reload configuration files,\n" "report the status of a PostgreSQL server, or signal a PostgreSQL process.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s init[db] [-D DATADIR] [-s] [-o \"OPTIONS\"]\n"), progname); @@ -1625,6 +1688,7 @@ do_help(void) printf(_(" %s stop [-W] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n"), progname); printf(_(" %s restart [-w] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n" " [-o \"OPTIONS\"]\n"), progname); + printf(_(" %s promote [-D DATADIR] [-s]\n"), progname); printf(_(" %s reload [-D DATADIR] [-s]\n"), progname); printf(_(" %s status [-D DATADIR]\n"), progname); printf(_(" %s kill SIGNALNAME PID\n"), progname); @@ -1950,6 +2014,8 @@ main(int argc, char **argv) ctl_command = STOP_COMMAND; else if (strcmp(argv[optind], "restart") == 0) ctl_command = RESTART_COMMAND; + else if (strcmp(argv[optind], "promote") == 0) + ctl_command = PROMOTE_COMMAND; else if (strcmp(argv[optind], "reload") == 0) ctl_command = RELOAD_COMMAND; else if (strcmp(argv[optind], "status") == 0) @@ -2036,6 +2102,7 @@ main(int argc, char **argv) snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data); snprintf(backup_file, MAXPGPATH, "%s/backup_label", pg_data); snprintf(recovery_file, MAXPGPATH, "%s/recovery.conf", pg_data); + snprintf(promote_file, MAXPGPATH, "%s/promote", pg_data); } switch (ctl_command) @@ -2055,6 +2122,9 @@ main(int argc, char **argv) case RESTART_COMMAND: do_restart(); break; + case PROMOTE_COMMAND: + do_promote(); + break; case RELOAD_COMMAND: do_reload(); break; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 1803d5ab20..7cd07a25d2 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -313,6 +313,7 @@ extern TimeLineID GetRecoveryTargetTLI(void); extern void HandleStartupProcInterrupts(void); extern void StartupProcessMain(void); +extern bool CheckPromoteSignal(void); extern void WakeupRecovery(void); /*