Change the signaling of end-of-recovery. Startup process now indicates end

of recovery by exiting with exit code 0, like in previous releases. Per
Tom's suggestion.
This commit is contained in:
Heikki Linnakangas 2009-02-23 09:28:50 +00:00
parent 451a15f95c
commit bc134d7a51
4 changed files with 116 additions and 205 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.331 2009/02/18 15:58:40 heikki Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.332 2009/02/23 09:28:49 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -432,7 +432,7 @@ static bool InRedo = false;
static volatile sig_atomic_t shutdown_requested = false; static volatile sig_atomic_t shutdown_requested = false;
/* /*
* Flag set when executing a restore command, to tell SIGTERM signal handler * Flag set when executing a restore command, to tell SIGTERM signal handler
* that it's safe to just proc_exit(0). * that it's safe to just proc_exit.
*/ */
static volatile sig_atomic_t in_restore_command = false; static volatile sig_atomic_t in_restore_command = false;
@ -2752,7 +2752,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
*/ */
in_restore_command = true; in_restore_command = true;
if (shutdown_requested) if (shutdown_requested)
proc_exit(0); proc_exit(1);
/* /*
* Copy xlog from archival storage to XLOGDIR * Copy xlog from archival storage to XLOGDIR
@ -2818,7 +2818,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
* On SIGTERM, assume we have received a fast shutdown request, and exit * On SIGTERM, assume we have received a fast shutdown request, and exit
* cleanly. It's pure chance whether we receive the SIGTERM first, or the * cleanly. It's pure chance whether we receive the SIGTERM first, or the
* child process. If we receive it first, the signal handler will call * child process. If we receive it first, the signal handler will call
* proc_exit(0), otherwise we do it here. If we or the child process * proc_exit, otherwise we do it here. If we or the child process
* received SIGTERM for any other reason than a fast shutdown request, * received SIGTERM for any other reason than a fast shutdown request,
* postmaster will perform an immediate shutdown when it sees us exiting * postmaster will perform an immediate shutdown when it sees us exiting
* unexpectedly. * unexpectedly.
@ -2829,7 +2829,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
* too. * too.
*/ */
if (WTERMSIG(rc) == SIGTERM) if (WTERMSIG(rc) == SIGTERM)
proc_exit(0); proc_exit(1);
signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
@ -5367,7 +5367,7 @@ StartupXLOG(void)
* recovery. * recovery.
*/ */
if (shutdown_requested) if (shutdown_requested)
proc_exit(0); proc_exit(1);
/* /*
* Have we reached our safe starting point? If so, we can * Have we reached our safe starting point? If so, we can
@ -7646,7 +7646,7 @@ static void
StartupProcShutdownHandler(SIGNAL_ARGS) StartupProcShutdownHandler(SIGNAL_ARGS)
{ {
if (in_restore_command) if (in_restore_command)
proc_exit(0); proc_exit(1);
else else
shutdown_requested = true; shutdown_requested = true;
} }
@ -7694,9 +7694,9 @@ StartupProcessMain(void)
BuildFlatFiles(false); BuildFlatFiles(false);
/* Let postmaster know that startup is finished */ /*
SendPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED); * Exit normally. Exit code 0 tells postmaster that we completed
* recovery successfully.
/* exit normally */ */
proc_exit(0); proc_exit(0);
} }

View File

@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.572 2009/02/19 16:43:13 heikki Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.573 2009/02/23 09:28:50 heikki Exp $
* *
* NOTES * NOTES
* *
@ -225,15 +225,7 @@ static pid_t StartupPID = 0,
static int Shutdown = NoShutdown; static int Shutdown = NoShutdown;
static bool FatalError = false; /* T if recovering from backend crash */ static bool FatalError = false; /* T if recovering from backend crash */
static bool RecoveryError = false; /* T if recovery failed */ static bool RecoveryError = false; /* T if WAL recovery failed */
/* State of WAL redo */
#define NoRecovery 0
#define RecoveryStarted 1
#define RecoveryConsistent 2
#define RecoveryCompleted 3
static int RecoveryStatus = NoRecovery;
/* /*
* We use a simple state machine to control startup, shutdown, and * We use a simple state machine to control startup, shutdown, and
@ -252,8 +244,8 @@ static int RecoveryStatus = NoRecovery;
* could start accepting connections to perform read-only queries at this * could start accepting connections to perform read-only queries at this
* point, if we had the infrastructure to do that. * point, if we had the infrastructure to do that.
* *
* When the WAL redo is finished, the startup process signals us the third * When WAL redo is finished, the startup process exits with exit code 0
* time, and we switch to PM_RUN state. The startup process can also skip the * and we switch to PM_RUN state. Startup process can also skip the
* recovery and consistent recovery phases altogether, as it will during * recovery and consistent recovery phases altogether, as it will during
* normal startup when there's no recovery to be done, for example. * normal startup when there's no recovery to be done, for example.
* *
@ -338,7 +330,6 @@ static void pmdie(SIGNAL_ARGS);
static void reaper(SIGNAL_ARGS); static void reaper(SIGNAL_ARGS);
static void sigusr1_handler(SIGNAL_ARGS); static void sigusr1_handler(SIGNAL_ARGS);
static void dummy_handler(SIGNAL_ARGS); static void dummy_handler(SIGNAL_ARGS);
static void CheckRecoverySignals(void);
static void CleanupBackend(int pid, int exitstatus); static void CleanupBackend(int pid, int exitstatus);
static void HandleChildCrash(int pid, int exitstatus, const char *procname); static void HandleChildCrash(int pid, int exitstatus, const char *procname);
static void LogChildExit(int lev, const char *procname, static void LogChildExit(int lev, const char *procname,
@ -2019,7 +2010,8 @@ pmdie(SIGNAL_ARGS)
ereport(LOG, ereport(LOG,
(errmsg("received smart shutdown request"))); (errmsg("received smart shutdown request")));
if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT) if (pmState == PM_RUN || pmState == PM_RECOVERY ||
pmState == PM_RECOVERY_CONSISTENT)
{ {
/* autovacuum workers are told to shut down immediately */ /* autovacuum workers are told to shut down immediately */
SignalAutovacWorkers(SIGTERM); SignalAutovacWorkers(SIGTERM);
@ -2161,21 +2153,12 @@ reaper(SIGNAL_ARGS)
{ {
StartupPID = 0; StartupPID = 0;
/*
* Check if we've received a signal from the startup process
* first. This can change pmState. If the startup process sends
* a signal and exits immediately after that, we might not have
* processed the signal yet. We need to know if it completed
* recovery before it exited.
*/
CheckRecoverySignals();
/* /*
* Unexpected exit of startup process (including FATAL exit) * Unexpected exit of startup process (including FATAL exit)
* during PM_STARTUP is treated as catastrophic. There is no * during PM_STARTUP is treated as catastrophic. There is no
* other processes running yet. * other processes running yet, so we can just exit.
*/ */
if (pmState == PM_STARTUP) if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
{ {
LogChildExit(LOG, _("startup process"), LogChildExit(LOG, _("startup process"),
pid, exitstatus); pid, exitstatus);
@ -2183,6 +2166,17 @@ reaper(SIGNAL_ARGS)
(errmsg("aborting startup due to startup process failure"))); (errmsg("aborting startup due to startup process failure")));
ExitPostmaster(1); ExitPostmaster(1);
} }
/*
* Startup process exited in response to a shutdown request (or
* it completed normally regardless of the shutdown request).
*/
if (Shutdown > NoShutdown &&
(EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
{
pmState = PM_WAIT_BACKENDS;
/* PostmasterStateMachine logic does the rest */
continue;
}
/* /*
* Any unexpected exit (including FATAL exit) of the startup * Any unexpected exit (including FATAL exit) of the startup
* process is treated as a crash, except that we don't want * process is treated as a crash, except that we don't want
@ -2195,18 +2189,44 @@ reaper(SIGNAL_ARGS)
_("startup process")); _("startup process"));
continue; continue;
} }
/* /*
* Startup process exited normally, but didn't finish recovery. * Startup succeeded, commence normal operations
* This can happen if someone else than postmaster kills the
* startup process with SIGTERM. Treat it like a crash.
*/ */
if (pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT) FatalError = false;
{ pmState = PM_RUN;
RecoveryError = true;
HandleChildCrash(pid, exitstatus, /*
_("startup process")); * Load the flat authorization file into postmaster's cache. The
continue; * startup process has recomputed this from the database contents,
} * so we wait till it finishes before loading it.
*/
load_role();
/*
* Crank up the background writer, if we didn't do that already
* when we entered consistent recovery phase. It doesn't matter
* if this fails, we'll just try again later.
*/
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
/*
* Likewise, start other special children as needed. In a restart
* situation, some of them may be alive already.
*/
if (WalWriterPID == 0)
WalWriterPID = StartWalWriter();
if (AutoVacuumingActive() && AutoVacPID == 0)
AutoVacPID = StartAutoVacLauncher();
if (XLogArchivingActive() && PgArchPID == 0)
PgArchPID = pgarch_start();
if (PgStatPID == 0)
PgStatPID = pgstat_start();
/* at this point we are really open for business */
ereport(LOG,
(errmsg("database system is ready to accept connections")));
} }
/* /*
@ -2622,124 +2642,6 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
static void static void
PostmasterStateMachine(void) PostmasterStateMachine(void)
{ {
/* Startup states */
if (pmState == PM_STARTUP && RecoveryStatus > NoRecovery)
{
/* WAL redo has started. We're out of reinitialization. */
FatalError = false;
/*
* Go to shutdown mode if a shutdown request was pending.
*/
if (Shutdown > NoShutdown)
{
pmState = PM_WAIT_BACKENDS;
/* PostmasterStateMachine logic does the rest */
}
else
{
/*
* Crank up the background writer. It doesn't matter if this
* fails, we'll just try again later.
*/
Assert(BgWriterPID == 0);
BgWriterPID = StartBackgroundWriter();
pmState = PM_RECOVERY;
}
}
if (pmState == PM_RECOVERY && RecoveryStatus >= RecoveryConsistent)
{
/*
* Recovery has reached a consistent recovery point. Go to shutdown
* mode if a shutdown request was pending.
*/
if (Shutdown > NoShutdown)
{
pmState = PM_WAIT_BACKENDS;
/* PostmasterStateMachine logic does the rest */
}
else
{
pmState = PM_RECOVERY_CONSISTENT;
/*
* Load the flat authorization file into postmaster's cache. The
* startup process won't have recomputed this from the database yet,
* so we it may change following recovery.
*/
load_role();
/*
* Likewise, start other special children as needed.
*/
Assert(PgStatPID == 0);
PgStatPID = pgstat_start();
/* XXX at this point we could accept read-only connections */
ereport(DEBUG1,
(errmsg("database system is in consistent recovery mode")));
}
}
if ((pmState == PM_RECOVERY ||
pmState == PM_RECOVERY_CONSISTENT ||
pmState == PM_STARTUP) &&
RecoveryStatus == RecoveryCompleted)
{
/*
* Startup succeeded.
*
* Go to shutdown mode if a shutdown request was pending.
*/
if (Shutdown > NoShutdown)
{
pmState = PM_WAIT_BACKENDS;
/* PostmasterStateMachine logic does the rest */
}
else
{
/*
* Otherwise, commence normal operations.
*/
pmState = PM_RUN;
/*
* Load the flat authorization file into postmaster's cache. The
* startup process has recomputed this from the database contents,
* so we wait till it finishes before loading it.
*/
load_role();
/*
* Crank up the background writer, if we didn't do that already
* when we entered consistent recovery phase. It doesn't matter
* if this fails, we'll just try again later.
*/
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
/*
* Likewise, start other special children as needed. In a restart
* situation, some of them may be alive already.
*/
if (WalWriterPID == 0)
WalWriterPID = StartWalWriter();
if (AutoVacuumingActive() && AutoVacPID == 0)
AutoVacPID = StartAutoVacLauncher();
if (XLogArchivingActive() && PgArchPID == 0)
PgArchPID = pgarch_start();
if (PgStatPID == 0)
PgStatPID = pgstat_start();
/* at this point we are really open for business */
ereport(LOG,
(errmsg("database system is ready to accept connections")));
}
}
/* Shutdown states */
if (pmState == PM_WAIT_BACKUP) if (pmState == PM_WAIT_BACKUP)
{ {
/* /*
@ -2901,8 +2803,6 @@ PostmasterStateMachine(void)
shmem_exit(1); shmem_exit(1);
reset_shared(PostPortNumber); reset_shared(PostPortNumber);
RecoveryStatus = NoRecovery;
StartupPID = StartupDataBase(); StartupPID = StartupDataBase();
Assert(StartupPID != 0); Assert(StartupPID != 0);
pmState = PM_STARTUP; pmState = PM_STARTUP;
@ -4006,37 +3906,6 @@ ExitPostmaster(int status)
proc_exit(status); proc_exit(status);
} }
/*
* common code used in sigusr1_handler() and reaper() to handle
* recovery-related signals from startup process
*/
static void
CheckRecoverySignals(void)
{
bool changed = false;
if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED))
{
Assert(pmState == PM_STARTUP);
RecoveryStatus = RecoveryStarted;
changed = true;
}
if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT))
{
RecoveryStatus = RecoveryConsistent;
changed = true;
}
if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED))
{
RecoveryStatus = RecoveryCompleted;
changed = true;
}
if (changed)
PostmasterStateMachine();
}
/* /*
* sigusr1_handler - handle signal conditions from child processes * sigusr1_handler - handle signal conditions from child processes
*/ */
@ -4047,7 +3916,49 @@ sigusr1_handler(SIGNAL_ARGS)
PG_SETMASK(&BlockSig); PG_SETMASK(&BlockSig);
CheckRecoverySignals(); /*
* RECOVERY_STARTED and RECOVERY_CONSISTENT signals are ignored in
* unexpected states. If the startup process quickly starts up, completes
* recovery, exits, we might process the death of the startup process
* first. We don't want to go back to recovery in that case.
*/
if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
pmState == PM_STARTUP)
{
/* WAL redo has started. We're out of reinitialization. */
FatalError = false;
/*
* Crank up the background writer. It doesn't matter if this
* fails, we'll just try again later.
*/
Assert(BgWriterPID == 0);
BgWriterPID = StartBackgroundWriter();
pmState = PM_RECOVERY;
}
if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT) &&
pmState == PM_RECOVERY)
{
/*
* Load the flat authorization file into postmaster's cache. The
* startup process won't have recomputed this from the database yet,
* so we it may change following recovery.
*/
load_role();
/*
* Likewise, start other special children as needed.
*/
Assert(PgStatPID == 0);
PgStatPID = pgstat_start();
/* XXX at this point we could accept read-only connections */
ereport(DEBUG1,
(errmsg("database system is in consistent recovery mode")));
pmState = PM_RECOVERY_CONSISTENT;
}
if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE)) if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
{ {

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.22 2009/02/18 15:58:41 heikki Exp $ * $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.23 2009/02/23 09:28:50 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -24,7 +24,6 @@ typedef enum
{ {
PMSIGNAL_RECOVERY_STARTED, /* recovery has started */ PMSIGNAL_RECOVERY_STARTED, /* recovery has started */
PMSIGNAL_RECOVERY_CONSISTENT, /* recovery has reached consistent state */ PMSIGNAL_RECOVERY_CONSISTENT, /* recovery has reached consistent state */
PMSIGNAL_RECOVERY_COMPLETED, /* recovery has completed */
PMSIGNAL_PASSWORD_CHANGE, /* pg_auth file has changed */ PMSIGNAL_PASSWORD_CHANGE, /* pg_auth file has changed */
PMSIGNAL_WAKEN_ARCHIVER, /* send a NOTIFY signal to xlog archiver */ PMSIGNAL_WAKEN_ARCHIVER, /* send a NOTIFY signal to xlog archiver */
PMSIGNAL_ROTATE_LOGFILE, /* send SIGUSR1 to syslogger to rotate logfile */ PMSIGNAL_ROTATE_LOGFILE, /* send SIGUSR1 to syslogger to rotate logfile */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.111 2009/02/19 08:02:32 heikki Exp $ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.112 2009/02/23 09:28:50 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -138,12 +138,13 @@ typedef struct PROC_HDR
/* /*
* We set aside some extra PGPROC structures for auxiliary processes, * We set aside some extra PGPROC structures for auxiliary processes,
* ie things that aren't full-fledged backends but need shmem access. * ie things that aren't full-fledged backends but need shmem access.
* *
* Background writer, WAL writer, and autovacuum launcher run during * Background writer, WAL writer, and autovacuum launcher run during
* normal operation. When recovery has just finished, the startup * normal operation. Startup process also consumes one slot, but WAL
* process can co-exist with them for a brief period before it exits. * writer and autovacuum launcher are launched only after it has
* exited.
*/ */
#define NUM_AUXILIARY_PROCS 4 #define NUM_AUXILIARY_PROCS 3
/* configurable options */ /* configurable options */