From 1c850fa80727180d03bdb6a8c2f672eeda7fa818 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 8 Apr 2010 01:39:37 +0000 Subject: [PATCH] Make smart shutdown work in combination with Hot Standby/Streaming Replication. At present, killing the startup process does not release any locks it holds, so we must wait to stop the startup and walreceiver processes until all read-only backends have exited. Without this patch, the startup and walreceiver processes never exit, so the server gets permanently stuck in a half-shutdown state. Fujii Masao, with review, docs, and comment adjustments by me. --- doc/src/sgml/ref/pg_ctl-ref.sgml | 4 +++- doc/src/sgml/runtime.sgml | 6 +++-- src/backend/postmaster/postmaster.c | 37 +++++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml index e11e59a3fa..99d9eb0ca7 100644 --- a/doc/src/sgml/ref/pg_ctl-ref.sgml +++ b/doc/src/sgml/ref/pg_ctl-ref.sgml @@ -1,5 +1,5 @@ @@ -152,6 +152,8 @@ PostgreSQL documentation shutdown methods can be selected with the option: Smart mode waits for online backup mode to finish and all the clients to disconnect. This is the default. + If the server is in recovery, recovery and streaming replication + will be terminated once all clients have disconnected. Fast mode does not wait for clients to disconnect and will terminate an online backup in progress. All active transactions are rolled back and clients are forcibly disconnected, then the diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index fd84d67f51..cb76b1d7b5 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,4 +1,4 @@ - + Server Setup and Operation @@ -1338,7 +1338,9 @@ echo -17 > /proc/self/oom_adj until online backup mode is no longer active. While backup mode is active, new connections will still be allowed, but only to superusers (this exception allows a superuser to connect to terminate - online backup mode). + online backup mode). If the server is in recovery when a smart + shutdown is requested, recovery and streaming replication will be + stopped only after all regular sessions have terminated. diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 43ecfac65b..d77ffd0743 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.604 2010/03/25 20:40:17 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.605 2010/04/08 01:39:37 rhaas Exp $ * * NOTES * @@ -278,6 +278,7 @@ typedef enum PM_RECOVERY_CONSISTENT, /* consistent recovery mode */ PM_RUN, /* normal "database is alive" state */ PM_WAIT_BACKUP, /* waiting for online backup mode to end */ + PM_WAIT_READONLY, /* waiting for read only backends to exit */ PM_WAIT_BACKENDS, /* waiting for live backends to exit */ PM_SHUTDOWN, /* waiting for bgwriter to do shutdown ckpt */ PM_SHUTDOWN_2, /* waiting for archiver and walsenders to @@ -2173,7 +2174,17 @@ pmdie(SIGNAL_ARGS) /* and the walwriter too */ if (WalWriterPID != 0) signal_child(WalWriterPID, SIGTERM); - pmState = PM_WAIT_BACKUP; + /* + * If we're in recovery, we can't kill the startup process + * right away, because at present doing so does not release + * its locks. We might want to change this in a future + * release. For the time being, the PM_WAIT_READONLY state + * indicates that we're waiting for the regular (read only) + * backends to die off; once they do, we'll kill the startup + * and walreceiver processes. + */ + pmState = (pmState == PM_RUN) ? + PM_WAIT_BACKUP : PM_WAIT_READONLY; } /* @@ -2209,6 +2220,7 @@ pmdie(SIGNAL_ARGS) } if (pmState == PM_RUN || pmState == PM_WAIT_BACKUP || + pmState == PM_WAIT_READONLY || pmState == PM_WAIT_BACKENDS || pmState == PM_RECOVERY_CONSISTENT) { @@ -2771,6 +2783,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) pmState == PM_RECOVERY_CONSISTENT || pmState == PM_RUN || pmState == PM_WAIT_BACKUP || + pmState == PM_WAIT_READONLY || pmState == PM_SHUTDOWN) pmState = PM_WAIT_BACKENDS; } @@ -2846,6 +2859,26 @@ PostmasterStateMachine(void) pmState = PM_WAIT_BACKENDS; } + if (pmState == PM_WAIT_READONLY) + { + /* + * PM_WAIT_READONLY state ends when we have no regular backends that + * have been started during recovery. We kill the startup and + * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally, + * we might like to kill these processes first and then wait for + * backends to die off, but that doesn't work at present because + * killing the startup process doesn't release its locks. + */ + if (CountChildren(BACKEND_TYPE_NORMAL) == 0) + { + if (StartupPID != 0) + signal_child(StartupPID, SIGTERM); + if (WalReceiverPID != 0) + signal_child(WalReceiverPID, SIGTERM); + pmState = PM_WAIT_BACKENDS; + } + } + /* * If we are in a state-machine state that implies waiting for backends to * exit, see if they're all gone, and change state if so.