Avoid bogus "out-of-sequence timeline ID" errors in standby-mode.

When startup process opens a WAL segment after replaying part of it, it
validates the first page on the WAL segment, even though the page it's
really interested in later in the file. As part of the validation, it checks
that the TLI on the page header is >= the TLI it saw on the last page it
read. If the segment contains a timeline switch, and we have already
replayed it, and then re-open the WAL segment (because of streaming
replication got disconnected and reconnected, for example), the TLI check
will fail when the first page is validated. Fix that by relaxing the TLI
check when re-opening a WAL segment.

Backpatch to 9.0. Earlier versions had the same code, but before standby
mode was introduced in 9.0, recovery never tried to re-read a segment after
partially replaying it.

Reported by Amit Kapila, while testing a new feature.
This commit is contained in:
Heikki Linnakangas 2012-11-22 11:23:46 +02:00
parent 27b2c6a1ef
commit 24c19e6bf9

View File

@ -570,6 +570,7 @@ static uint32 readRecordBufSize = 0;
static XLogRecPtr ReadRecPtr; /* start of last record read */
static XLogRecPtr EndRecPtr; /* end+1 of last record read */
static TimeLineID lastPageTLI = 0;
static TimeLineID lastSegmentTLI = 0;
static XLogRecPtr minRecoveryPoint; /* local copy of
* ControlFile->minRecoveryPoint */
@ -644,7 +645,7 @@ static void CleanupBackupHistory(void);
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
static void CheckRecoveryConsistency(void);
static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode);
static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly);
static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record,
int emode, bool randAccess);
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
@ -3339,7 +3340,8 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
* to go backwards (but we can't reset that variable right here, since
* we might not change files at all).
*/
lastPageTLI = 0; /* see comment in ValidXLogPageHeader */
/* see comment in ValidXLogPageHeader */
lastPageTLI = lastSegmentTLI = 0;
randAccess = true; /* allow curFileTLI to go backwards too */
}
@ -3579,7 +3581,7 @@ next_record_is_invalid:
* ReadRecord. It's not intended for use from anywhere else.
*/
static bool
ValidXLogPageHeader(XLogPageHeader hdr, int emode)
ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly)
{
XLogRecPtr recaddr;
@ -3681,19 +3683,32 @@ ValidXLogPageHeader(XLogPageHeader hdr, int emode)
* successive pages of a consistent WAL sequence.
*
* Of course this check should only be applied when advancing sequentially
* across pages; therefore ReadRecord resets lastPageTLI to zero when
* going to a random page.
* across pages; therefore ReadRecord resets lastPageTLI and
* lastSegmentTLI to zero when going to a random page.
*
* Sometimes we re-open a segment that's already been partially replayed.
* In that case we cannot perform the normal TLI check: if there is a
* timeline switch within the segment, the first page has a smaller TLI
* than later pages following the timeline switch, and we might've read
* them already. As a weaker test, we still check that it's not smaller
* than the TLI we last saw at the beginning of a segment. Pass
* segmentonly = true when re-validating the first page like that, and the
* page you're actually interested in comes later.
*/
if (hdr->xlp_tli < lastPageTLI)
if (hdr->xlp_tli < (segmentonly ? lastSegmentTLI : lastPageTLI))
{
ereport(emode_for_corrupt_record(emode, recaddr),
(errmsg("out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u",
hdr->xlp_tli, lastPageTLI,
hdr->xlp_tli,
segmentonly ? lastSegmentTLI : lastPageTLI,
XLogFileNameP(curFileTLI, readSegNo),
readOff)));
return false;
}
lastPageTLI = hdr->xlp_tli;
if (readOff == 0)
lastSegmentTLI = hdr->xlp_tli;
return true;
}
@ -9366,7 +9381,7 @@ retry:
fname, readOff)));
goto next_record_is_invalid;
}
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode, true))
goto next_record_is_invalid;
}
@ -9392,7 +9407,7 @@ retry:
fname, readOff)));
goto next_record_is_invalid;
}
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode, false))
goto next_record_is_invalid;
readFileHeaderValidated = true;