mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Allow Hot Standby to begin from a shutdown checkpoint.
Patch by Simon Riggs & me
This commit is contained in:
parent
ea9c103237
commit
361bd1662e
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.59 2010/02/26 02:00:34 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.60 2010/04/13 14:17:46 heikki Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Each global transaction is associated with a global transaction
|
||||
@ -1718,6 +1718,89 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* StandbyRecoverPreparedTransactions
|
||||
*
|
||||
* Scan the pg_twophase directory and setup all the required information to
|
||||
* allow standby queries to treat prepared transactions as still active.
|
||||
* This is never called at the end of recovery - we use
|
||||
* RecoverPreparedTransactions() at that point.
|
||||
*
|
||||
* Currently we simply call SubTransSetParent() for any subxids of prepared
|
||||
* transactions. If overwriteOK is true, it's OK if some XIDs have already
|
||||
* been marked in pg_subtrans.
|
||||
*/
|
||||
void
|
||||
StandbyRecoverPreparedTransactions(bool overwriteOK)
|
||||
{
|
||||
DIR *cldir;
|
||||
struct dirent *clde;
|
||||
|
||||
cldir = AllocateDir(TWOPHASE_DIR);
|
||||
while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
|
||||
{
|
||||
if (strlen(clde->d_name) == 8 &&
|
||||
strspn(clde->d_name, "0123456789ABCDEF") == 8)
|
||||
{
|
||||
TransactionId xid;
|
||||
char *buf;
|
||||
TwoPhaseFileHeader *hdr;
|
||||
TransactionId *subxids;
|
||||
int i;
|
||||
|
||||
xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
|
||||
|
||||
/* Already processed? */
|
||||
if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errmsg("removing stale two-phase state file \"%s\"",
|
||||
clde->d_name)));
|
||||
RemoveTwoPhaseFile(xid, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Read and validate file */
|
||||
buf = ReadTwoPhaseFile(xid, true);
|
||||
if (buf == NULL)
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errmsg("removing corrupt two-phase state file \"%s\"",
|
||||
clde->d_name)));
|
||||
RemoveTwoPhaseFile(xid, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Deconstruct header */
|
||||
hdr = (TwoPhaseFileHeader *) buf;
|
||||
if (!TransactionIdEquals(hdr->xid, xid))
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errmsg("removing corrupt two-phase state file \"%s\"",
|
||||
clde->d_name)));
|
||||
RemoveTwoPhaseFile(xid, true);
|
||||
pfree(buf);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Examine subtransaction XIDs ... they should all follow main
|
||||
* XID.
|
||||
*/
|
||||
subxids = (TransactionId *)
|
||||
(buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
|
||||
for (i = 0; i < hdr->nsubxacts; i++)
|
||||
{
|
||||
TransactionId subxid = subxids[i];
|
||||
|
||||
Assert(TransactionIdFollows(subxid, xid));
|
||||
SubTransSetParent(xid, subxid, overwriteOK);
|
||||
}
|
||||
}
|
||||
}
|
||||
FreeDir(cldir);
|
||||
}
|
||||
|
||||
/*
|
||||
* RecoverPreparedTransactions
|
||||
*
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.393 2010/04/12 10:40:42 heikki Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.394 2010/04/13 14:17:46 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -496,6 +496,7 @@ static TimeLineID lastPageTLI = 0;
|
||||
static XLogRecPtr minRecoveryPoint; /* local copy of
|
||||
* ControlFile->minRecoveryPoint */
|
||||
static bool updateMinRecoveryPoint = true;
|
||||
static bool reachedMinRecoveryPoint = false;
|
||||
|
||||
static bool InRedo = false;
|
||||
|
||||
@ -551,6 +552,7 @@ static void ValidateXLOGDirectoryStructure(void);
|
||||
static void CleanupBackupHistory(void);
|
||||
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
|
||||
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
|
||||
static void CheckRecoveryConsistency(void);
|
||||
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
|
||||
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
|
||||
static List *readTimeLineHistory(TimeLineID targetTLI);
|
||||
@ -5591,7 +5593,6 @@ StartupXLOG(void)
|
||||
uint32 freespace;
|
||||
TransactionId oldestActiveXID;
|
||||
bool bgwriterLaunched = false;
|
||||
bool backendsAllowed = false;
|
||||
|
||||
/*
|
||||
* Read control file and check XLOG status looks valid.
|
||||
@ -5838,6 +5839,8 @@ StartupXLOG(void)
|
||||
if (InRecovery)
|
||||
{
|
||||
int rmid;
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||
|
||||
/*
|
||||
* Update pg_control to show that we are recovering and to show the
|
||||
@ -5930,6 +5933,33 @@ StartupXLOG(void)
|
||||
StartupMultiXact();
|
||||
|
||||
ProcArrayInitRecoveryInfo(oldestActiveXID);
|
||||
|
||||
/*
|
||||
* If we're beginning at a shutdown checkpoint, we know that
|
||||
* nothing was running on the master at this point. So fake-up
|
||||
* an empty running-xacts record and use that here and now.
|
||||
* Recover additional standby state for prepared transactions.
|
||||
*/
|
||||
if (wasShutdown)
|
||||
{
|
||||
RunningTransactionsData running;
|
||||
|
||||
/*
|
||||
* Construct a RunningTransactions snapshot representing a shut
|
||||
* down server, with only prepared transactions still alive.
|
||||
* We're never overflowed at this point because all subxids
|
||||
* are listed with their parent prepared transactions.
|
||||
*/
|
||||
running.xcnt = nxids;
|
||||
running.subxid_overflow = false;
|
||||
running.nextXid = checkPoint.nextXid;
|
||||
running.oldestRunningXid = oldestActiveXID;
|
||||
running.xids = xids;
|
||||
|
||||
ProcArrayApplyRecoveryInfo(&running);
|
||||
|
||||
StandbyRecoverPreparedTransactions(false);
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize resource managers */
|
||||
@ -5939,6 +5969,46 @@ StartupXLOG(void)
|
||||
RmgrTable[rmid].rm_startup();
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize shared replayEndRecPtr and recoveryLastRecPtr.
|
||||
*
|
||||
* This is slightly confusing if we're starting from an online
|
||||
* checkpoint; we've just read and replayed the chekpoint record,
|
||||
* but we're going to start replay from its redo pointer, which
|
||||
* precedes the location of the checkpoint record itself. So even
|
||||
* though the last record we've replayed is indeed ReadRecPtr, we
|
||||
* haven't replayed all the preceding records yet. That's OK for
|
||||
* the current use of these variables.
|
||||
*/
|
||||
SpinLockAcquire(&xlogctl->info_lck);
|
||||
xlogctl->replayEndRecPtr = ReadRecPtr;
|
||||
xlogctl->recoveryLastRecPtr = ReadRecPtr;
|
||||
SpinLockRelease(&xlogctl->info_lck);
|
||||
|
||||
/*
|
||||
* Let postmaster know we've started redo now, so that it can
|
||||
* launch bgwriter to perform restartpoints. We don't bother
|
||||
* during crash recovery as restartpoints can only be performed
|
||||
* during archive recovery. And we'd like to keep crash recovery
|
||||
* simple, to avoid introducing bugs that could you from
|
||||
* recovering after crash.
|
||||
*
|
||||
* After this point, we can no longer assume that we're the only
|
||||
* process in addition to postmaster! Also, fsync requests are
|
||||
* subsequently to be handled by the bgwriter, not locally.
|
||||
*/
|
||||
if (InArchiveRecovery && IsUnderPostmaster)
|
||||
{
|
||||
SetForwardFsyncRequests();
|
||||
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
|
||||
bgwriterLaunched = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow read-only connections immediately if we're consistent already.
|
||||
*/
|
||||
CheckRecoveryConsistency();
|
||||
|
||||
/*
|
||||
* Find the first record that logically follows the checkpoint --- it
|
||||
* might physically precede it, though.
|
||||
@ -5958,43 +6028,14 @@ StartupXLOG(void)
|
||||
{
|
||||
bool recoveryContinue = true;
|
||||
bool recoveryApply = true;
|
||||
bool reachedMinRecoveryPoint = false;
|
||||
ErrorContextCallback errcontext;
|
||||
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||
|
||||
/* initialize shared replayEndRecPtr and recoveryLastRecPtr */
|
||||
SpinLockAcquire(&xlogctl->info_lck);
|
||||
xlogctl->replayEndRecPtr = ReadRecPtr;
|
||||
xlogctl->recoveryLastRecPtr = ReadRecPtr;
|
||||
SpinLockRelease(&xlogctl->info_lck);
|
||||
|
||||
InRedo = true;
|
||||
|
||||
ereport(LOG,
|
||||
(errmsg("redo starts at %X/%X",
|
||||
ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
|
||||
|
||||
/*
|
||||
* Let postmaster know we've started redo now, so that it can
|
||||
* launch bgwriter to perform restartpoints. We don't bother
|
||||
* during crash recovery as restartpoints can only be performed
|
||||
* during archive recovery. And we'd like to keep crash recovery
|
||||
* simple, to avoid introducing bugs that could you from
|
||||
* recovering after crash.
|
||||
*
|
||||
* After this point, we can no longer assume that we're the only
|
||||
* process in addition to postmaster! Also, fsync requests are
|
||||
* subsequently to be handled by the bgwriter, not locally.
|
||||
*/
|
||||
if (InArchiveRecovery && IsUnderPostmaster)
|
||||
{
|
||||
SetForwardFsyncRequests();
|
||||
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
|
||||
bgwriterLaunched = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* main redo apply loop
|
||||
*/
|
||||
@ -6024,32 +6065,8 @@ StartupXLOG(void)
|
||||
/* Handle interrupt signals of startup process */
|
||||
HandleStartupProcInterrupts();
|
||||
|
||||
/*
|
||||
* Have we passed our safe starting point?
|
||||
*/
|
||||
if (!reachedMinRecoveryPoint &&
|
||||
XLByteLE(minRecoveryPoint, EndRecPtr) &&
|
||||
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
|
||||
{
|
||||
reachedMinRecoveryPoint = true;
|
||||
ereport(LOG,
|
||||
(errmsg("consistent recovery state reached at %X/%X",
|
||||
EndRecPtr.xlogid, EndRecPtr.xrecoff)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Have we got a valid starting snapshot that will allow
|
||||
* queries to be run? If so, we can tell postmaster that the
|
||||
* database is consistent now, enabling connections.
|
||||
*/
|
||||
if (standbyState == STANDBY_SNAPSHOT_READY &&
|
||||
!backendsAllowed &&
|
||||
reachedMinRecoveryPoint &&
|
||||
IsUnderPostmaster)
|
||||
{
|
||||
backendsAllowed = true;
|
||||
SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
|
||||
}
|
||||
/* Allow read-only connections if we're consistent now */
|
||||
CheckRecoveryConsistency();
|
||||
|
||||
/*
|
||||
* Have we reached our recovery target?
|
||||
@ -6398,6 +6415,44 @@ StartupXLOG(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if recovery has reached a consistent state. When consistency is
|
||||
* reached and we have a valid starting standby snapshot, tell postmaster
|
||||
* that it can start accepting read-only connections.
|
||||
*/
|
||||
static void
|
||||
CheckRecoveryConsistency(void)
|
||||
{
|
||||
static bool backendsAllowed = false;
|
||||
|
||||
/*
|
||||
* Have we passed our safe starting point?
|
||||
*/
|
||||
if (!reachedMinRecoveryPoint &&
|
||||
XLByteLE(minRecoveryPoint, EndRecPtr) &&
|
||||
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
|
||||
{
|
||||
reachedMinRecoveryPoint = true;
|
||||
ereport(LOG,
|
||||
(errmsg("consistent recovery state reached at %X/%X",
|
||||
EndRecPtr.xlogid, EndRecPtr.xrecoff)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Have we got a valid starting snapshot that will allow
|
||||
* queries to be run? If so, we can tell postmaster that the
|
||||
* database is consistent now, enabling connections.
|
||||
*/
|
||||
if (standbyState == STANDBY_SNAPSHOT_READY &&
|
||||
!backendsAllowed &&
|
||||
reachedMinRecoveryPoint &&
|
||||
IsUnderPostmaster)
|
||||
{
|
||||
backendsAllowed = true;
|
||||
SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the system still in recovery?
|
||||
*
|
||||
@ -7657,13 +7712,36 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
if (standbyState != STANDBY_DISABLED)
|
||||
CheckRequiredParameterValues(checkPoint);
|
||||
|
||||
/*
|
||||
* If we see a shutdown checkpoint, we know that nothing was
|
||||
* running on the master at this point. So fake-up an empty
|
||||
* running-xacts record and use that here and now. Recover
|
||||
* additional standby state for prepared transactions.
|
||||
*/
|
||||
if (standbyState >= STANDBY_INITIALIZED)
|
||||
{
|
||||
TransactionId *xids;
|
||||
int nxids;
|
||||
TransactionId oldestActiveXID;
|
||||
RunningTransactionsData running;
|
||||
|
||||
oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
|
||||
|
||||
/*
|
||||
* Remove stale transactions, if any.
|
||||
* Construct a RunningTransactions snapshot representing a shut
|
||||
* down server, with only prepared transactions still alive.
|
||||
* We're never overflowed at this point because all subxids
|
||||
* are listed with their parent prepared transactions.
|
||||
*/
|
||||
ExpireOldKnownAssignedTransactionIds(checkPoint.nextXid);
|
||||
StandbyReleaseOldLocks(checkPoint.nextXid);
|
||||
running.xcnt = nxids;
|
||||
running.subxid_overflow = false;
|
||||
running.nextXid = checkPoint.nextXid;
|
||||
running.oldestRunningXid = oldestActiveXID;
|
||||
running.xids = xids;
|
||||
|
||||
ProcArrayApplyRecoveryInfo(&running);
|
||||
|
||||
StandbyRecoverPreparedTransactions(true);
|
||||
}
|
||||
|
||||
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/access/twophase.h,v 1.14 2010/01/02 16:58:00 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/access/twophase.h,v 1.15 2010/04/13 14:17:46 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -44,6 +44,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
|
||||
|
||||
extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
|
||||
int *nxids_p);
|
||||
extern void StandbyRecoverPreparedTransactions(bool overwriteOK);
|
||||
extern void RecoverPreparedTransactions(void);
|
||||
|
||||
extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
|
||||
|
Loading…
Reference in New Issue
Block a user