Make smart shutdown work in combination with Hot Standby/Streaming Replication.

At present, killing the startup process does not release any locks it holds, so we must wait to stop the startup and walreceiver processes until all read-only backends have exited. Without this patch, the startup and walreceiver processes never exit, so the server gets permanently stuck in a half-shutdown state. Fujii Masao, with review, docs, and comment adjustments by me.
2024-12-27 08:39:28 +08:00 · 2010-04-08 01:39:37 +00:00 · 2010-04-08 01:39:37 +00:00 · 1c850fa807
commit 1c850fa807
parent 2c0870ff7a
3 changed files with 42 additions and 5 deletions
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/pg_ctl-ref.sgml,v 1.49 2010/04/03 07:23:01 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/pg_ctl-ref.sgml,v 1.50 2010/04/08 01:39:37 rhaas Exp $
 PostgreSQL documentation
 -->
@ -152,6 +152,8 @@ PostgreSQL documentation
   shutdown methods can be selected with the <option>-m</option>
   option: <quote>Smart</quote> mode waits for online backup mode
   to finish and all the clients to disconnect.  This is the default.
   If the server is in recovery, recovery and streaming replication
   will be terminated once all clients have disconnected.
   <quote>Fast</quote> mode does not wait for clients to disconnect and
   will terminate an online backup in progress.  All active transactions are
   rolled back and clients are forcibly disconnected, then the
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.433 2010/03/21 00:43:40 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.434 2010/04/08 01:39:37 rhaas Exp $ -->
 <chapter Id="runtime">
 <title>Server Setup and Operation</title>
@ -1338,7 +1338,9 @@ echo -17 > /proc/self/oom_adj
       until online backup mode is no longer active.  While backup mode is
       active, new connections will still be allowed, but only to superusers
       (this exception allows a superuser to connect to terminate
-       online backup mode).
+       online backup mode).  If the server is in recovery when a smart
       shutdown is requested, recovery and streaming replication will be
       stopped only after all regular sessions have terminated.
      </para>
     </listitem>
    </varlistentry>
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@ -37,7 +37,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.604 2010/03/25 20:40:17 sriggs Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.605 2010/04/08 01:39:37 rhaas Exp $
 *
 * NOTES
 *
@ -278,6 +278,7 @@ typedef enum
 	PM_RECOVERY_CONSISTENT,		/* consistent recovery mode */
 	PM_RUN,						/* normal "database is alive" state */
 	PM_WAIT_BACKUP,				/* waiting for online backup mode to end */
 	PM_WAIT_READONLY,			/* waiting for read only backends to exit */
 	PM_WAIT_BACKENDS,			/* waiting for live backends to exit */
 	PM_SHUTDOWN,				/* waiting for bgwriter to do shutdown ckpt */
 	PM_SHUTDOWN_2,				/* waiting for archiver and walsenders to
@ -2173,7 +2174,17 @@ pmdie(SIGNAL_ARGS)
 				/* and the walwriter too */
 				if (WalWriterPID != 0)
 					signal_child(WalWriterPID, SIGTERM);
-				pmState = PM_WAIT_BACKUP;
+				/*
 				 * If we're in recovery, we can't kill the startup process
 				 * right away, because at present doing so does not release
 				 * its locks.  We might want to change this in a future
 				 * release.  For the time being, the PM_WAIT_READONLY state
 				 * indicates that we're waiting for the regular (read only)
 				 * backends to die off; once they do, we'll kill the startup
 				 * and walreceiver processes.
 				 */
 				pmState = (pmState == PM_RUN) ?
 					PM_WAIT_BACKUP : PM_WAIT_READONLY;
 			}
 			/*
@ -2209,6 +2220,7 @@ pmdie(SIGNAL_ARGS)
 			}
 			if (pmState == PM_RUN ||
 				pmState == PM_WAIT_BACKUP ||
 				pmState == PM_WAIT_READONLY ||
 				pmState == PM_WAIT_BACKENDS ||
 				pmState == PM_RECOVERY_CONSISTENT)
 			{
@ -2771,6 +2783,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 		pmState == PM_RECOVERY_CONSISTENT ||
 		pmState == PM_RUN ||
 		pmState == PM_WAIT_BACKUP ||
 		pmState == PM_WAIT_READONLY ||
 		pmState == PM_SHUTDOWN)
 		pmState = PM_WAIT_BACKENDS;
 }
@ -2846,6 +2859,26 @@ PostmasterStateMachine(void)
 			pmState = PM_WAIT_BACKENDS;
 	}
 	if (pmState == PM_WAIT_READONLY)
 	{
 		/*
 		 * PM_WAIT_READONLY state ends when we have no regular backends that
 		 * have been started during recovery.  We kill the startup and
 		 * walreceiver processes and transition to PM_WAIT_BACKENDS.  Ideally,
 		 * we might like to kill these processes first and then wait for
 		 * backends to die off, but that doesn't work at present because
 		 * killing the startup process doesn't release its locks.
 		 */
 		if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
 		{
 			if (StartupPID != 0)
 				signal_child(StartupPID, SIGTERM);
 			if (WalReceiverPID != 0)
 				signal_child(WalReceiverPID, SIGTERM);
 			pmState = PM_WAIT_BACKENDS;
 		}
 	}
 	/*
 	 * If we are in a state-machine state that implies waiting for backends to
 	 * exit, see if they're all gone, and change state if so.