diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 1f8d2d6bdbb..59c539a4e0e 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -51,6 +51,7 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" +#include "storage/proc.h" #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" @@ -64,6 +65,11 @@ */ int BgWriterDelay = 200; +/* + * Time to sleep between bgwriter rounds, when it has no work to do. + */ +#define BGWRITER_HIBERNATE_MS 10000 + /* * Flags set by interrupt handlers for later service in the main loop. */ @@ -77,13 +83,14 @@ static bool am_bg_writer = false; /* Prototypes for private functions */ -static void BgWriterNap(void); +static void BgWriterNap(bool hibernating); /* Signal handlers */ static void bg_quickdie(SIGNAL_ARGS); static void BgSigHupHandler(SIGNAL_ARGS); static void ReqShutdownHandler(SIGNAL_ARGS); +static void bgwriter_sigusr1_handler(SIGNAL_ARGS); /* @@ -97,6 +104,7 @@ BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; + bool hibernating; am_bg_writer = true; @@ -112,10 +120,10 @@ BackgroundWriterMain(void) #endif /* - * Properly accept or ignore signals the postmaster might send us + * Properly accept or ignore signals the postmaster might send us. * - * SIGUSR1 is presently unused; keep it spare in case someday we want this - * process to participate in ProcSignal signalling. + * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1 + * handler is still needed for latch wakeups. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); /* as of 9.2 no longer requests checkpoint */ @@ -123,7 +131,7 @@ BackgroundWriterMain(void) pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ + pqsignal(SIGUSR1, bgwriter_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* @@ -138,6 +146,12 @@ BackgroundWriterMain(void) /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); + /* + * Advertise our latch that backends can use to wake us up while we're + * sleeping. + */ + ProcGlobal->bgwriterLatch = &MyProc->procLatch; + /* * Create a resource owner to keep track of our resources (currently only * buffer pins). @@ -235,8 +249,11 @@ BackgroundWriterMain(void) /* * Loop forever */ + hibernating = false; for (;;) { + bool lapped; + /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. @@ -264,18 +281,66 @@ BackgroundWriterMain(void) /* * Do one cycle of dirty-buffer writing. */ - BgBufferSync(); + if (hibernating && bgwriter_lru_maxpages > 0) + ResetLatch(&MyProc->procLatch); + lapped = BgBufferSync(); - /* Nap for the configured time. */ - BgWriterNap(); + if (lapped && !hibernating) + { + /* + * BgBufferSync did nothing. Since there doesn't seem to be any + * work for the bgwriter to do, go into slower-paced + * "hibernation" mode, where we sleep for much longer times than + * bgwriter_delay says. Fewer wakeups saves electricity. If a + * backend starts dirtying pages again, it will wake us up by + * setting our latch. + * + * The latch is kept set during productive cycles where buffers + * are written, and only reset before going into a longer sleep. + * That ensures that when there's a constant trickle of activity, + * the SetLatch() calls that backends have to do will see the + * latch as already set, and are not slowed down by having to + * actually set the latch and signal us. + */ + hibernating = true; + + /* + * Take one more short nap and perform one more bgwriter cycle - + * someone might've dirtied a buffer just after we finished the + * previous bgwriter cycle, while the latch was still set. If + * we still find nothing to do after this cycle, the next sleep + * will be longer. + */ + BgWriterNap(false); + continue; + } + else if (!lapped && hibernating) + { + /* + * Woken up from hibernation. Set the latch just in case it's + * not set yet (usually we wake up from hibernation because a + * backend already set the latch, but not necessarily). + */ + SetLatch(&MyProc->procLatch); + hibernating = false; + } + + /* + * Take a short or long nap, depending on whether there was any work + * to do. + */ + BgWriterNap(hibernating); } } /* * BgWriterNap -- Nap for the configured time or until a signal is received. + * + * If 'hibernating' is false, sleeps for bgwriter_delay milliseconds. + * Otherwise sleeps longer, but also wakes up if the process latch is set. */ static void -BgWriterNap(void) +BgWriterNap(bool hibernating) { long udelay; @@ -285,18 +350,44 @@ BgWriterNap(void) pgstat_send_bgwriter(); /* - * Nap for the configured time, or sleep for 10 seconds if there is no - * bgwriter activity configured. + * If there was no work to do in the previous bgwriter cycle, take a + * longer nap. + */ + if (hibernating) + { + /* + * We wake on a buffer being dirtied. It's possible that some + * useful work will become available for the bgwriter to do without + * a buffer actually being dirtied, like when a dirty buffer's usage + * count is decremented to zero or it's unpinned. This corner case + * is judged as too marginal to justify adding additional SetLatch() + * calls in very hot code paths, cheap though those calls may be. + * + * We still wake up periodically, so that BufferAlloc stats are + * updated reasonably promptly. + */ + int res = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + BGWRITER_HIBERNATE_MS); + + /* + * Only do a quick return if timeout was reached (or postmaster died) + * to ensure that no less than BgWriterDelay ms has passed between + * BgBufferSyncs - WaitLatch() might have returned instantaneously. + */ + if (res & (WL_TIMEOUT | WL_POSTMASTER_DEATH)) + return; + } + + /* + * Nap for the configured time. * * On some platforms, signals won't interrupt the sleep. To ensure we * respond reasonably promptly when someone signals us, break down the * sleep into 1-second increments, and check for interrupts after each * nap. */ - if (bgwriter_lru_maxpages > 0) - udelay = BgWriterDelay * 1000L; - else - udelay = 10000000L; /* Ten seconds */ + udelay = BgWriterDelay * 1000L; while (udelay > 999999L) { @@ -351,12 +442,35 @@ bg_quickdie(SIGNAL_ARGS) static void BgSigHupHandler(SIGNAL_ARGS) { + int save_errno = errno; + got_SIGHUP = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; } /* SIGTERM: set flag to shutdown and exit */ static void ReqShutdownHandler(SIGNAL_ARGS) { + int save_errno = errno; + shutdown_requested = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* SIGUSR1: used for latch wakeups */ +static void +bgwriter_sigusr1_handler(SIGNAL_ARGS) +{ + int save_errno = errno; + + latch_sigusr1_handler(); + + errno = save_errno; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 8f68bcc66d9..1adb6d360dd 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -953,6 +953,7 @@ void MarkBufferDirty(Buffer buffer) { volatile BufferDesc *bufHdr; + bool dirtied = false; if (!BufferIsValid(buffer)) elog(ERROR, "bad buffer ID: %d", buffer); @@ -973,19 +974,25 @@ MarkBufferDirty(Buffer buffer) Assert(bufHdr->refcount > 0); - /* - * If the buffer was not dirty already, do vacuum accounting. - */ if (!(bufHdr->flags & BM_DIRTY)) - { - VacuumPageDirty++; - if (VacuumCostActive) - VacuumCostBalance += VacuumCostPageDirty; - } + dirtied = true; bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); UnlockBufHdr(bufHdr); + + /* + * If the buffer was not dirty already, do vacuum accounting, and + * nudge bgwriter. + */ + if (dirtied) + { + VacuumPageDirty++; + if (VacuumCostActive) + VacuumCostBalance += VacuumCostPageDirty; + if (ProcGlobal->bgwriterLatch) + SetLatch(ProcGlobal->bgwriterLatch); + } } /* @@ -1307,8 +1314,12 @@ BufferSync(int flags) * BgBufferSync -- Write out some dirty buffers in the pool. * * This is called periodically by the background writer process. + * + * Returns true if the clocksweep has been "lapped", so that there's nothing + * to do. Also returns true if there's nothing to do because bgwriter was + * effectively disabled by setting bgwriter_lru_maxpages to 0. */ -void +bool BgBufferSync(void) { /* info obtained from freelist.c */ @@ -1365,7 +1376,7 @@ BgBufferSync(void) if (bgwriter_lru_maxpages <= 0) { saved_info_valid = false; - return; + return true; } /* @@ -1584,6 +1595,8 @@ BgBufferSync(void) recent_alloc, strategy_delta, scans_per_alloc, smoothed_density); #endif } + + return (bufs_to_lap == 0); } /* @@ -2341,16 +2354,24 @@ SetBufferCommitInfoNeedsSave(Buffer buffer) if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) != (BM_DIRTY | BM_JUST_DIRTIED)) { + bool dirtied = false; + LockBufHdr(bufHdr); Assert(bufHdr->refcount > 0); if (!(bufHdr->flags & BM_DIRTY)) + dirtied = true; + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + UnlockBufHdr(bufHdr); + + if (dirtied) { VacuumPageDirty++; if (VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; + /* The bgwriter may need to be woken. */ + if (ProcGlobal->bgwriterLatch) + SetLatch(ProcGlobal->bgwriterLatch); } - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - UnlockBufHdr(bufHdr); } } diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index a03c06874b7..de1bbd01d83 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -213,7 +213,7 @@ extern bool HoldingBufferPinThatDelaysRecovery(void); extern void AbortBufferIO(void); extern void BufmgrCommit(void); -extern void BgBufferSync(void); +extern bool BgBufferSync(void); extern void AtProcExit_LocalBuffers(void); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 358d1a456cb..b68ae39268e 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -188,6 +188,8 @@ typedef struct PROC_HDR PGPROC *freeProcs; /* Head of list of autovacuum's free PGPROC structures */ PGPROC *autovacFreeProcs; + /* BGWriter process latch */ + Latch *bgwriterLatch; /* Current shared estimate of appropriate spins_per_delay value */ int spins_per_delay; /* The proc of the Startup process, since not in ProcArray */