mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
Send SIGKILL to children if they don't die quickly in immediate shutdown
On immediate shutdown, or during a restart-after-crash sequence, postmaster used to send SIGQUIT (and then abandon ship if shutdown); but this is not a good strategy if backends don't die because of that signal. (This might happen, for example, if a backend gets tangled trying to malloc() due to gettext(), as in an example illustrated by MauMau.) This causes problems when later trying to restart the server, because some processes are still attached to the shared memory segment. Instead of just abandoning such backends to their fates, we now have postmaster hang around for a little while longer, send a SIGKILL after some reasonable waiting period, and then exit. This makes immediate shutdown more reliable. There is disagreement on whether it's best for postmaster to exit after sending SIGKILL, or to stick around until all children have reported death. If this controversy is resolved differently than what this patch implements, it's an easy change to make. Bug reported by MauMau in message 20DAEA8949EC4E2289C6E8E58560DEC0@maumau MauMau and Álvaro Herrera
This commit is contained in:
parent
457d6cf049
commit
82233ce7ea
@ -1362,11 +1362,11 @@ echo -1000 > /proc/self/oom_score_adj
|
|||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
This is the <firstterm>Immediate Shutdown</firstterm> mode.
|
This is the <firstterm>Immediate Shutdown</firstterm> mode.
|
||||||
The master <command>postgres</command> process will send a
|
The server will send <systemitem>SIGQUIT</systemitem> to all child
|
||||||
<systemitem>SIGQUIT</systemitem> to all child processes and exit
|
processes and wait for them to terminate. Those that don't terminate
|
||||||
immediately, without properly shutting itself down. The child processes
|
within 5 seconds, will be sent <systemitem>SIGKILL</systemitem> by the
|
||||||
likewise exit immediately upon receiving
|
master <command>postgres</command> process, which will then terminate
|
||||||
<systemitem>SIGQUIT</systemitem>. This will lead to recovery (by
|
without further waiting. This will lead to recovery (by
|
||||||
replaying the WAL log) upon next start-up. This is recommended
|
replaying the WAL log) upon next start-up. This is recommended
|
||||||
only in emergencies.
|
only in emergencies.
|
||||||
</para>
|
</para>
|
||||||
|
@ -275,6 +275,7 @@ static pid_t StartupPID = 0,
|
|||||||
#define NoShutdown 0
|
#define NoShutdown 0
|
||||||
#define SmartShutdown 1
|
#define SmartShutdown 1
|
||||||
#define FastShutdown 2
|
#define FastShutdown 2
|
||||||
|
#define ImmediateShutdown 3
|
||||||
|
|
||||||
static int Shutdown = NoShutdown;
|
static int Shutdown = NoShutdown;
|
||||||
|
|
||||||
@ -345,6 +346,10 @@ typedef enum
|
|||||||
|
|
||||||
static PMState pmState = PM_INIT;
|
static PMState pmState = PM_INIT;
|
||||||
|
|
||||||
|
/* Start time of abort processing at immediate shutdown or child crash */
|
||||||
|
static time_t AbortStartTime;
|
||||||
|
#define SIGKILL_CHILDREN_AFTER_SECS 5
|
||||||
|
|
||||||
static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
|
static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
|
||||||
|
|
||||||
bool ClientAuthInProgress = false; /* T during new-client
|
bool ClientAuthInProgress = false; /* T during new-client
|
||||||
@ -421,6 +426,7 @@ static void RandomSalt(char *md5Salt);
|
|||||||
static void signal_child(pid_t pid, int signal);
|
static void signal_child(pid_t pid, int signal);
|
||||||
static bool SignalSomeChildren(int signal, int targets);
|
static bool SignalSomeChildren(int signal, int targets);
|
||||||
static bool SignalUnconnectedWorkers(int signal);
|
static bool SignalUnconnectedWorkers(int signal);
|
||||||
|
static void TerminateChildren(int signal);
|
||||||
|
|
||||||
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
|
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
|
||||||
|
|
||||||
@ -1427,8 +1433,18 @@ DetermineSleepTime(struct timeval * timeout)
|
|||||||
if (Shutdown > NoShutdown ||
|
if (Shutdown > NoShutdown ||
|
||||||
(!StartWorkerNeeded && !HaveCrashedWorker))
|
(!StartWorkerNeeded && !HaveCrashedWorker))
|
||||||
{
|
{
|
||||||
timeout->tv_sec = 60;
|
if (AbortStartTime > 0)
|
||||||
timeout->tv_usec = 0;
|
{
|
||||||
|
/* remaining time, but at least 1 second */
|
||||||
|
timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS -
|
||||||
|
(time(NULL) - AbortStartTime), 1);
|
||||||
|
timeout->tv_usec = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
timeout->tv_sec = 60;
|
||||||
|
timeout->tv_usec = 0;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1660,6 +1676,28 @@ ServerLoop(void)
|
|||||||
TouchSocketLockFiles();
|
TouchSocketLockFiles();
|
||||||
last_touch_time = now;
|
last_touch_time = now;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we already sent SIGQUIT to children and they are slow to shut
|
||||||
|
* down, it's time to send them SIGKILL. This doesn't happen normally,
|
||||||
|
* but under certain conditions backends can get stuck while shutting
|
||||||
|
* down. This is a last measure to get them unwedged.
|
||||||
|
*
|
||||||
|
* Note we also do this during recovery from a process crash.
|
||||||
|
*/
|
||||||
|
if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
|
||||||
|
now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS)
|
||||||
|
{
|
||||||
|
/* We were gentle with them before. Not anymore */
|
||||||
|
TerminateChildren(SIGKILL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Additionally, unless we're recovering from a process crash, it's
|
||||||
|
* now the time for postmaster to abandon ship.
|
||||||
|
*/
|
||||||
|
if (!FatalError)
|
||||||
|
ExitPostmaster(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2455,30 +2493,27 @@ pmdie(SIGNAL_ARGS)
|
|||||||
/*
|
/*
|
||||||
* Immediate Shutdown:
|
* Immediate Shutdown:
|
||||||
*
|
*
|
||||||
* abort all children with SIGQUIT and exit without attempt to
|
* abort all children with SIGQUIT, wait for them to exit,
|
||||||
* properly shut down data base system.
|
* terminate remaining ones with SIGKILL, then exit without
|
||||||
|
* attempt to properly shut down the data base system.
|
||||||
*/
|
*/
|
||||||
|
if (Shutdown >= ImmediateShutdown)
|
||||||
|
break;
|
||||||
|
Shutdown = ImmediateShutdown;
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("received immediate shutdown request")));
|
(errmsg("received immediate shutdown request")));
|
||||||
SignalChildren(SIGQUIT);
|
|
||||||
if (StartupPID != 0)
|
TerminateChildren(SIGQUIT);
|
||||||
signal_child(StartupPID, SIGQUIT);
|
pmState = PM_WAIT_BACKENDS;
|
||||||
if (BgWriterPID != 0)
|
|
||||||
signal_child(BgWriterPID, SIGQUIT);
|
/* set stopwatch for them to die */
|
||||||
if (CheckpointerPID != 0)
|
AbortStartTime = time(NULL);
|
||||||
signal_child(CheckpointerPID, SIGQUIT);
|
|
||||||
if (WalWriterPID != 0)
|
/*
|
||||||
signal_child(WalWriterPID, SIGQUIT);
|
* Now wait for backends to exit. If there are none,
|
||||||
if (WalReceiverPID != 0)
|
* PostmasterStateMachine will take the next step.
|
||||||
signal_child(WalReceiverPID, SIGQUIT);
|
*/
|
||||||
if (AutoVacPID != 0)
|
PostmasterStateMachine();
|
||||||
signal_child(AutoVacPID, SIGQUIT);
|
|
||||||
if (PgArchPID != 0)
|
|
||||||
signal_child(PgArchPID, SIGQUIT);
|
|
||||||
if (PgStatPID != 0)
|
|
||||||
signal_child(PgStatPID, SIGQUIT);
|
|
||||||
SignalUnconnectedWorkers(SIGQUIT);
|
|
||||||
ExitPostmaster(0);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2952,12 +2987,17 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
dlist_mutable_iter iter;
|
dlist_mutable_iter iter;
|
||||||
slist_iter siter;
|
slist_iter siter;
|
||||||
Backend *bp;
|
Backend *bp;
|
||||||
|
bool take_action;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make log entry unless there was a previous crash (if so, nonzero exit
|
* We only log messages and send signals if this is the first process crash
|
||||||
* status is to be expected in SIGQUIT response; don't clutter log)
|
* and we're not doing an immediate shutdown; otherwise, we're only here to
|
||||||
|
* update postmaster's idea of live processes. If we have already signalled
|
||||||
|
* children, nonzero exit status is to be expected, so don't clutter log.
|
||||||
*/
|
*/
|
||||||
if (!FatalError)
|
take_action = !FatalError && Shutdown != ImmediateShutdown;
|
||||||
|
|
||||||
|
if (take_action)
|
||||||
{
|
{
|
||||||
LogChildExit(LOG, procname, pid, exitstatus);
|
LogChildExit(LOG, procname, pid, exitstatus);
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
@ -3003,7 +3043,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
* (-s on command line), then we send SIGSTOP instead, so that we
|
* (-s on command line), then we send SIGSTOP instead, so that we
|
||||||
* can get core dumps from all backends by hand.
|
* can get core dumps from all backends by hand.
|
||||||
*/
|
*/
|
||||||
if (!FatalError)
|
if (take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3055,7 +3095,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
|
if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!FatalError)
|
if (take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3069,7 +3109,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
/* Take care of the startup process too */
|
/* Take care of the startup process too */
|
||||||
if (pid == StartupPID)
|
if (pid == StartupPID)
|
||||||
StartupPID = 0;
|
StartupPID = 0;
|
||||||
else if (StartupPID != 0 && !FatalError)
|
else if (StartupPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3081,7 +3121,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
/* Take care of the bgwriter too */
|
/* Take care of the bgwriter too */
|
||||||
if (pid == BgWriterPID)
|
if (pid == BgWriterPID)
|
||||||
BgWriterPID = 0;
|
BgWriterPID = 0;
|
||||||
else if (BgWriterPID != 0 && !FatalError)
|
else if (BgWriterPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3093,7 +3133,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
/* Take care of the checkpointer too */
|
/* Take care of the checkpointer too */
|
||||||
if (pid == CheckpointerPID)
|
if (pid == CheckpointerPID)
|
||||||
CheckpointerPID = 0;
|
CheckpointerPID = 0;
|
||||||
else if (CheckpointerPID != 0 && !FatalError)
|
else if (CheckpointerPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3105,7 +3145,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
/* Take care of the walwriter too */
|
/* Take care of the walwriter too */
|
||||||
if (pid == WalWriterPID)
|
if (pid == WalWriterPID)
|
||||||
WalWriterPID = 0;
|
WalWriterPID = 0;
|
||||||
else if (WalWriterPID != 0 && !FatalError)
|
else if (WalWriterPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3117,7 +3157,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
/* Take care of the walreceiver too */
|
/* Take care of the walreceiver too */
|
||||||
if (pid == WalReceiverPID)
|
if (pid == WalReceiverPID)
|
||||||
WalReceiverPID = 0;
|
WalReceiverPID = 0;
|
||||||
else if (WalReceiverPID != 0 && !FatalError)
|
else if (WalReceiverPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3129,7 +3169,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
/* Take care of the autovacuum launcher too */
|
/* Take care of the autovacuum launcher too */
|
||||||
if (pid == AutoVacPID)
|
if (pid == AutoVacPID)
|
||||||
AutoVacPID = 0;
|
AutoVacPID = 0;
|
||||||
else if (AutoVacPID != 0 && !FatalError)
|
else if (AutoVacPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3144,7 +3184,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
* simplifies the state-machine logic in the case where a shutdown request
|
* simplifies the state-machine logic in the case where a shutdown request
|
||||||
* arrives during crash processing.)
|
* arrives during crash processing.)
|
||||||
*/
|
*/
|
||||||
if (PgArchPID != 0 && !FatalError)
|
if (PgArchPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3159,7 +3199,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
* simplifies the state-machine logic in the case where a shutdown request
|
* simplifies the state-machine logic in the case where a shutdown request
|
||||||
* arrives during crash processing.)
|
* arrives during crash processing.)
|
||||||
*/
|
*/
|
||||||
if (PgStatPID != 0 && !FatalError)
|
if (PgStatPID != 0 && take_action)
|
||||||
{
|
{
|
||||||
ereport(DEBUG2,
|
ereport(DEBUG2,
|
||||||
(errmsg_internal("sending %s to process %d",
|
(errmsg_internal("sending %s to process %d",
|
||||||
@ -3171,7 +3211,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
|
|
||||||
/* We do NOT restart the syslogger */
|
/* We do NOT restart the syslogger */
|
||||||
|
|
||||||
FatalError = true;
|
if (Shutdown != ImmediateShutdown)
|
||||||
|
FatalError = true;
|
||||||
|
|
||||||
/* We now transit into a state of waiting for children to die */
|
/* We now transit into a state of waiting for children to die */
|
||||||
if (pmState == PM_RECOVERY ||
|
if (pmState == PM_RECOVERY ||
|
||||||
pmState == PM_HOT_STANDBY ||
|
pmState == PM_HOT_STANDBY ||
|
||||||
@ -3180,6 +3222,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||||||
pmState == PM_WAIT_READONLY ||
|
pmState == PM_WAIT_READONLY ||
|
||||||
pmState == PM_SHUTDOWN)
|
pmState == PM_SHUTDOWN)
|
||||||
pmState = PM_WAIT_BACKENDS;
|
pmState = PM_WAIT_BACKENDS;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* .. and if this doesn't happen quickly enough, now the clock is ticking
|
||||||
|
* for us to kill them without mercy.
|
||||||
|
*/
|
||||||
|
if (AbortStartTime == 0)
|
||||||
|
AbortStartTime = time(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3316,7 +3365,7 @@ PostmasterStateMachine(void)
|
|||||||
WalWriterPID == 0 &&
|
WalWriterPID == 0 &&
|
||||||
AutoVacPID == 0)
|
AutoVacPID == 0)
|
||||||
{
|
{
|
||||||
if (FatalError)
|
if (Shutdown >= ImmediateShutdown || FatalError)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Start waiting for dead_end children to die. This state
|
* Start waiting for dead_end children to die. This state
|
||||||
@ -3326,7 +3375,8 @@ PostmasterStateMachine(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* We already SIGQUIT'd the archiver and stats processes, if
|
* We already SIGQUIT'd the archiver and stats processes, if
|
||||||
* any, when we entered FatalError state.
|
* any, when we started immediate shutdown or entered
|
||||||
|
* FatalError state.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -3511,6 +3561,7 @@ signal_child(pid_t pid, int signal)
|
|||||||
case SIGTERM:
|
case SIGTERM:
|
||||||
case SIGQUIT:
|
case SIGQUIT:
|
||||||
case SIGSTOP:
|
case SIGSTOP:
|
||||||
|
case SIGKILL:
|
||||||
if (kill(-pid, signal) < 0)
|
if (kill(-pid, signal) < 0)
|
||||||
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
|
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
|
||||||
break;
|
break;
|
||||||
@ -3597,6 +3648,33 @@ SignalSomeChildren(int signal, int target)
|
|||||||
return signaled;
|
return signaled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Send a termination signal to children. This considers all of our children
|
||||||
|
* processes, except syslogger and dead_end backends.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
TerminateChildren(int signal)
|
||||||
|
{
|
||||||
|
SignalChildren(signal);
|
||||||
|
if (StartupPID != 0)
|
||||||
|
signal_child(StartupPID, signal);
|
||||||
|
if (BgWriterPID != 0)
|
||||||
|
signal_child(BgWriterPID, signal);
|
||||||
|
if (CheckpointerPID != 0)
|
||||||
|
signal_child(CheckpointerPID, signal);
|
||||||
|
if (WalWriterPID != 0)
|
||||||
|
signal_child(WalWriterPID, signal);
|
||||||
|
if (WalReceiverPID != 0)
|
||||||
|
signal_child(WalReceiverPID, signal);
|
||||||
|
if (AutoVacPID != 0)
|
||||||
|
signal_child(AutoVacPID, signal);
|
||||||
|
if (PgArchPID != 0)
|
||||||
|
signal_child(PgArchPID, signal);
|
||||||
|
if (PgStatPID != 0)
|
||||||
|
signal_child(PgStatPID, signal);
|
||||||
|
SignalUnconnectedWorkers(signal);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BackendStartup -- start backend process
|
* BackendStartup -- start backend process
|
||||||
*
|
*
|
||||||
|
@ -38,6 +38,26 @@ pgkill(int pid, int sig)
|
|||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* special case for SIGKILL: just ask the system to terminate the target */
|
||||||
|
if (sig == SIGKILL)
|
||||||
|
{
|
||||||
|
HANDLE prochandle;
|
||||||
|
|
||||||
|
if ((prochandle = OpenProcess(PROCESS_TERMINATE, FALSE, (DWORD) pid)) == NULL)
|
||||||
|
{
|
||||||
|
errno = ESRCH;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!TerminateProcess(prochandle, 255))
|
||||||
|
{
|
||||||
|
_dosmaperr(GetLastError());
|
||||||
|
CloseHandle(prochandle);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
CloseHandle(prochandle);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid);
|
snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid);
|
||||||
|
|
||||||
if (CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, &bytes, 1000))
|
if (CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, &bytes, 1000))
|
||||||
|
Loading…
Reference in New Issue
Block a user