From f40022f1adaeff85b01d98fea38cf5aa16814aa7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 10 May 2012 14:34:22 -0400 Subject: [PATCH] Make WaitLatch's WL_POSTMASTER_DEATH result trustworthy; simplify callers. Per a suggestion from Peter Geoghegan, make WaitLatch responsible for verifying that the WL_POSTMASTER_DEATH bit it returns is truthful (by testing PostmasterIsAlive). Then simplify its callers, who no longer need to do that for themselves. Remove weasel wording about falsely-set result bits from WaitLatch's API contract. --- src/backend/port/unix_latch.c | 35 ++++++++++++++++++--------- src/backend/port/win32_latch.c | 12 +++++++-- src/backend/postmaster/autovacuum.c | 9 ++++--- src/backend/postmaster/bgwriter.c | 7 ++---- src/backend/postmaster/checkpointer.c | 7 ++---- src/backend/postmaster/pgstat.c | 7 ++---- src/backend/postmaster/walwriter.c | 7 ++---- 7 files changed, 47 insertions(+), 37 deletions(-) diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c index 10bf2dbec7..30d1a488f0 100644 --- a/src/backend/port/unix_latch.c +++ b/src/backend/port/unix_latch.c @@ -50,6 +50,7 @@ #include "miscadmin.h" #include "postmaster/postmaster.h" #include "storage/latch.h" +#include "storage/pmsignal.h" #include "storage/shmem.h" /* Are we currently in WaitLatch? The signal handler would like to know. */ @@ -160,15 +161,7 @@ DisownLatch(volatile Latch *latch) * * Returns bit mask indicating which condition(s) caused the wake-up. Note * that if multiple wake-up conditions are true, there is no guarantee that - * we return all of them in one call, but we will return at least one. Also, - * according to the select(2) man page on Linux, select(2) may spuriously - * return and report a file descriptor as readable, when it's not. We use - * select(2), so WaitLatch can also spuriously claim that a socket is - * readable, or postmaster has died, even when none of the wake conditions - * have been satisfied. That should be rare in practice, but the caller - * should not use the return value for anything critical, re-checking the - * situation with PostmasterIsAlive() or read() on a socket as necessary. - * The latch and timeout flag bits can be trusted, however. + * we return all of them in one call, but we will return at least one. */ int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout) @@ -318,7 +311,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, if ((wakeEvents & WL_POSTMASTER_DEATH) && (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL))) { - result |= WL_POSTMASTER_DEATH; + /* + * According to the select(2) man page on Linux, select(2) may + * spuriously return and report a file descriptor as readable, + * when it's not; and presumably so can poll(2). It's not clear + * that the relevant cases would ever apply to the postmaster + * pipe, but since the consequences of falsely returning + * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the + * trouble to positively verify EOF with PostmasterIsAlive(). + */ + if (!PostmasterIsAlive()) + result |= WL_POSTMASTER_DEATH; } #else /* !HAVE_POLL */ @@ -380,7 +383,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, if ((wakeEvents & WL_POSTMASTER_DEATH) && FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask)) { - result |= WL_POSTMASTER_DEATH; + /* + * According to the select(2) man page on Linux, select(2) may + * spuriously return and report a file descriptor as readable, + * when it's not; and presumably so can poll(2). It's not clear + * that the relevant cases would ever apply to the postmaster + * pipe, but since the consequences of falsely returning + * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the + * trouble to positively verify EOF with PostmasterIsAlive(). + */ + if (!PostmasterIsAlive()) + result |= WL_POSTMASTER_DEATH; } #endif /* HAVE_POLL */ } while (result == 0); diff --git a/src/backend/port/win32_latch.c b/src/backend/port/win32_latch.c index e4fe05fa4b..622798924d 100644 --- a/src/backend/port/win32_latch.c +++ b/src/backend/port/win32_latch.c @@ -26,6 +26,7 @@ #include "miscadmin.h" #include "postmaster/postmaster.h" #include "storage/latch.h" +#include "storage/pmsignal.h" #include "storage/shmem.h" @@ -217,8 +218,15 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, else if ((wakeEvents & WL_POSTMASTER_DEATH) && rc == WAIT_OBJECT_0 + pmdeath_eventno) { - /* Postmaster died */ - result |= WL_POSTMASTER_DEATH; + /* + * Postmaster apparently died. Since the consequences of falsely + * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we + * take the trouble to positively verify this with + * PostmasterIsAlive(), even though there is no known reason to + * think that the event could be falsely set on Windows. + */ + if (!PostmasterIsAlive()) + result |= WL_POSTMASTER_DEATH; } else elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 49c15c3261..fa9a115945 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -574,6 +574,7 @@ AutoVacLauncherMain(int argc, char *argv[]) TimestampTz current_time = 0; bool can_launch; Dlelem *elem; + int rc; /* * This loop is a bit different from the normal use of WaitLatch, @@ -592,9 +593,9 @@ AutoVacLauncherMain(int argc, char *argv[]) * Wait until naptime expires or we get some type of signal (all the * signal handlers will wake us by calling SetLatch). */ - WaitLatch(&MyProc->procLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L)); + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L)); ResetLatch(&MyProc->procLatch); @@ -604,7 +605,7 @@ AutoVacLauncherMain(int argc, char *argv[]) * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ - if (!PostmasterIsAlive()) + if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* the normal shutdown case */ diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index abb665ee50..5daf8c6177 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -48,7 +48,6 @@ #include "storage/buf_internals.h" #include "storage/ipc.h" #include "storage/lwlock.h" -#include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/shmem.h" #include "storage/smgr.h" @@ -323,11 +322,9 @@ BackgroundWriterMain(void) /* * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. Note - * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely; - * if it is set, recheck with PostmasterIsAlive before believing it. + * necessity for manual cleanup of all postmaster children. */ - if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive()) + if (rc & WL_POSTMASTER_DEATH) exit(1); prev_hibernate = can_hibernate; diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 7f8ba95c44..5db60cd5d9 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -50,7 +50,6 @@ #include "storage/bufmgr.h" #include "storage/ipc.h" #include "storage/lwlock.h" -#include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/shmem.h" #include "storage/smgr.h" @@ -581,11 +580,9 @@ CheckpointerMain(void) /* * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. Note - * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely; - * if it is set, recheck with PostmasterIsAlive before believing it. + * necessity for manual cleanup of all postmaster children. */ - if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive()) + if (rc & WL_POSTMASTER_DEATH) exit(1); } } diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 15002dcd9d..01273e1934 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -51,7 +51,6 @@ #include "storage/ipc.h" #include "storage/latch.h" #include "storage/pg_shmem.h" -#include "storage/pmsignal.h" #include "storage/procsignal.h" #include "utils/ascii.h" #include "utils/guc.h" @@ -3227,11 +3226,9 @@ PgstatCollectorMain(int argc, char *argv[]) /* * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. Note - * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely; - * if it is set, recheck with PostmasterIsAlive before believing it. + * necessity for manual cleanup of all postmaster children. */ - if ((wr & WL_POSTMASTER_DEATH) && !PostmasterIsAlive()) + if (wr & WL_POSTMASTER_DEATH) break; } /* end of outer loop */ diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index 886ad4f926..77455db166 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -53,7 +53,6 @@ #include "storage/bufmgr.h" #include "storage/ipc.h" #include "storage/lwlock.h" -#include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/smgr.h" #include "utils/guc.h" @@ -305,11 +304,9 @@ WalWriterMain(void) /* * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. Note - * that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely; - * if it is set, recheck with PostmasterIsAlive before believing it. + * necessity for manual cleanup of all postmaster children. */ - if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive()) + if (rc & WL_POSTMASTER_DEATH) exit(1); } }