Teach RequestCheckpoint() to wait and retry a few times if it can't signal

the bgwriter immediately.  This covers the case where the bgwriter is still
starting up, as seen in a recent buildfarm failure.  In future it might also
assist with clean recovery after a bgwriter termination and restart ---
right now the postmaster treats early bgwriter exit as a system crash,
but that might not always be so.
This commit is contained in:
Tom Lane 2008-11-23 01:40:19 +00:00
parent 8309d006cb
commit 6f6a6d8b14

View File

@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.53 2008/10/14 08:06:39 heikki Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.54 2008/11/23 01:40:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -864,6 +864,7 @@ RequestCheckpoint(int flags)
{ {
/* use volatile pointer to prevent code rearrangement */ /* use volatile pointer to prevent code rearrangement */
volatile BgWriterShmemStruct *bgs = BgWriterShmem; volatile BgWriterShmemStruct *bgs = BgWriterShmem;
int ntries;
int old_failed, int old_failed,
old_started; old_started;
@ -905,15 +906,38 @@ RequestCheckpoint(int flags)
SpinLockRelease(&bgs->ckpt_lck); SpinLockRelease(&bgs->ckpt_lck);
/* /*
* Send signal to request checkpoint. When not waiting, we consider * Send signal to request checkpoint. It's possible that the bgwriter
* failure to send the signal to be nonfatal. * hasn't started yet, or is in process of restarting, so we will retry
* a few times if needed. Also, if not told to wait for the checkpoint
* to occur, we consider failure to send the signal to be nonfatal and
* merely LOG it.
*/ */
if (BgWriterShmem->bgwriter_pid == 0) for (ntries = 0; ; ntries++)
elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG, {
"could not request checkpoint because bgwriter not running"); if (BgWriterShmem->bgwriter_pid == 0)
if (kill(BgWriterShmem->bgwriter_pid, SIGINT) != 0) {
elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG, if (ntries >= 20) /* max wait 2.0 sec */
"could not signal for checkpoint: %m"); {
elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
"could not request checkpoint because bgwriter not running");
break;
}
}
else if (kill(BgWriterShmem->bgwriter_pid, SIGINT) != 0)
{
if (ntries >= 20) /* max wait 2.0 sec */
{
elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
"could not signal for checkpoint: %m");
break;
}
}
else
break; /* signal sent successfully */
CHECK_FOR_INTERRUPTS();
pg_usleep(100000L); /* wait 0.1 sec, then retry */
}
/* /*
* If requested, wait for completion. We detect completion according to * If requested, wait for completion. We detect completion according to