mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
Allow Hot Standby to begin from a shutdown checkpoint.
Patch by Simon Riggs & me
This commit is contained in:
parent
ea9c103237
commit
361bd1662e
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.59 2010/02/26 02:00:34 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.60 2010/04/13 14:17:46 heikki Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Each global transaction is associated with a global transaction
|
* Each global transaction is associated with a global transaction
|
||||||
@ -1718,6 +1718,89 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* StandbyRecoverPreparedTransactions
|
||||||
|
*
|
||||||
|
* Scan the pg_twophase directory and setup all the required information to
|
||||||
|
* allow standby queries to treat prepared transactions as still active.
|
||||||
|
* This is never called at the end of recovery - we use
|
||||||
|
* RecoverPreparedTransactions() at that point.
|
||||||
|
*
|
||||||
|
* Currently we simply call SubTransSetParent() for any subxids of prepared
|
||||||
|
* transactions. If overwriteOK is true, it's OK if some XIDs have already
|
||||||
|
* been marked in pg_subtrans.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
StandbyRecoverPreparedTransactions(bool overwriteOK)
|
||||||
|
{
|
||||||
|
DIR *cldir;
|
||||||
|
struct dirent *clde;
|
||||||
|
|
||||||
|
cldir = AllocateDir(TWOPHASE_DIR);
|
||||||
|
while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
|
||||||
|
{
|
||||||
|
if (strlen(clde->d_name) == 8 &&
|
||||||
|
strspn(clde->d_name, "0123456789ABCDEF") == 8)
|
||||||
|
{
|
||||||
|
TransactionId xid;
|
||||||
|
char *buf;
|
||||||
|
TwoPhaseFileHeader *hdr;
|
||||||
|
TransactionId *subxids;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
|
||||||
|
|
||||||
|
/* Already processed? */
|
||||||
|
if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
|
||||||
|
{
|
||||||
|
ereport(WARNING,
|
||||||
|
(errmsg("removing stale two-phase state file \"%s\"",
|
||||||
|
clde->d_name)));
|
||||||
|
RemoveTwoPhaseFile(xid, true);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read and validate file */
|
||||||
|
buf = ReadTwoPhaseFile(xid, true);
|
||||||
|
if (buf == NULL)
|
||||||
|
{
|
||||||
|
ereport(WARNING,
|
||||||
|
(errmsg("removing corrupt two-phase state file \"%s\"",
|
||||||
|
clde->d_name)));
|
||||||
|
RemoveTwoPhaseFile(xid, true);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Deconstruct header */
|
||||||
|
hdr = (TwoPhaseFileHeader *) buf;
|
||||||
|
if (!TransactionIdEquals(hdr->xid, xid))
|
||||||
|
{
|
||||||
|
ereport(WARNING,
|
||||||
|
(errmsg("removing corrupt two-phase state file \"%s\"",
|
||||||
|
clde->d_name)));
|
||||||
|
RemoveTwoPhaseFile(xid, true);
|
||||||
|
pfree(buf);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Examine subtransaction XIDs ... they should all follow main
|
||||||
|
* XID.
|
||||||
|
*/
|
||||||
|
subxids = (TransactionId *)
|
||||||
|
(buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
|
||||||
|
for (i = 0; i < hdr->nsubxacts; i++)
|
||||||
|
{
|
||||||
|
TransactionId subxid = subxids[i];
|
||||||
|
|
||||||
|
Assert(TransactionIdFollows(subxid, xid));
|
||||||
|
SubTransSetParent(xid, subxid, overwriteOK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FreeDir(cldir);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RecoverPreparedTransactions
|
* RecoverPreparedTransactions
|
||||||
*
|
*
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.393 2010/04/12 10:40:42 heikki Exp $
|
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.394 2010/04/13 14:17:46 heikki Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -496,6 +496,7 @@ static TimeLineID lastPageTLI = 0;
|
|||||||
static XLogRecPtr minRecoveryPoint; /* local copy of
|
static XLogRecPtr minRecoveryPoint; /* local copy of
|
||||||
* ControlFile->minRecoveryPoint */
|
* ControlFile->minRecoveryPoint */
|
||||||
static bool updateMinRecoveryPoint = true;
|
static bool updateMinRecoveryPoint = true;
|
||||||
|
static bool reachedMinRecoveryPoint = false;
|
||||||
|
|
||||||
static bool InRedo = false;
|
static bool InRedo = false;
|
||||||
|
|
||||||
@ -551,6 +552,7 @@ static void ValidateXLOGDirectoryStructure(void);
|
|||||||
static void CleanupBackupHistory(void);
|
static void CleanupBackupHistory(void);
|
||||||
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
|
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
|
||||||
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
|
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
|
||||||
|
static void CheckRecoveryConsistency(void);
|
||||||
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
|
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
|
||||||
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
|
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
|
||||||
static List *readTimeLineHistory(TimeLineID targetTLI);
|
static List *readTimeLineHistory(TimeLineID targetTLI);
|
||||||
@ -5591,7 +5593,6 @@ StartupXLOG(void)
|
|||||||
uint32 freespace;
|
uint32 freespace;
|
||||||
TransactionId oldestActiveXID;
|
TransactionId oldestActiveXID;
|
||||||
bool bgwriterLaunched = false;
|
bool bgwriterLaunched = false;
|
||||||
bool backendsAllowed = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read control file and check XLOG status looks valid.
|
* Read control file and check XLOG status looks valid.
|
||||||
@ -5838,6 +5839,8 @@ StartupXLOG(void)
|
|||||||
if (InRecovery)
|
if (InRecovery)
|
||||||
{
|
{
|
||||||
int rmid;
|
int rmid;
|
||||||
|
/* use volatile pointer to prevent code rearrangement */
|
||||||
|
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update pg_control to show that we are recovering and to show the
|
* Update pg_control to show that we are recovering and to show the
|
||||||
@ -5930,6 +5933,33 @@ StartupXLOG(void)
|
|||||||
StartupMultiXact();
|
StartupMultiXact();
|
||||||
|
|
||||||
ProcArrayInitRecoveryInfo(oldestActiveXID);
|
ProcArrayInitRecoveryInfo(oldestActiveXID);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're beginning at a shutdown checkpoint, we know that
|
||||||
|
* nothing was running on the master at this point. So fake-up
|
||||||
|
* an empty running-xacts record and use that here and now.
|
||||||
|
* Recover additional standby state for prepared transactions.
|
||||||
|
*/
|
||||||
|
if (wasShutdown)
|
||||||
|
{
|
||||||
|
RunningTransactionsData running;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Construct a RunningTransactions snapshot representing a shut
|
||||||
|
* down server, with only prepared transactions still alive.
|
||||||
|
* We're never overflowed at this point because all subxids
|
||||||
|
* are listed with their parent prepared transactions.
|
||||||
|
*/
|
||||||
|
running.xcnt = nxids;
|
||||||
|
running.subxid_overflow = false;
|
||||||
|
running.nextXid = checkPoint.nextXid;
|
||||||
|
running.oldestRunningXid = oldestActiveXID;
|
||||||
|
running.xids = xids;
|
||||||
|
|
||||||
|
ProcArrayApplyRecoveryInfo(&running);
|
||||||
|
|
||||||
|
StandbyRecoverPreparedTransactions(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initialize resource managers */
|
/* Initialize resource managers */
|
||||||
@ -5939,6 +5969,46 @@ StartupXLOG(void)
|
|||||||
RmgrTable[rmid].rm_startup();
|
RmgrTable[rmid].rm_startup();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize shared replayEndRecPtr and recoveryLastRecPtr.
|
||||||
|
*
|
||||||
|
* This is slightly confusing if we're starting from an online
|
||||||
|
* checkpoint; we've just read and replayed the chekpoint record,
|
||||||
|
* but we're going to start replay from its redo pointer, which
|
||||||
|
* precedes the location of the checkpoint record itself. So even
|
||||||
|
* though the last record we've replayed is indeed ReadRecPtr, we
|
||||||
|
* haven't replayed all the preceding records yet. That's OK for
|
||||||
|
* the current use of these variables.
|
||||||
|
*/
|
||||||
|
SpinLockAcquire(&xlogctl->info_lck);
|
||||||
|
xlogctl->replayEndRecPtr = ReadRecPtr;
|
||||||
|
xlogctl->recoveryLastRecPtr = ReadRecPtr;
|
||||||
|
SpinLockRelease(&xlogctl->info_lck);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Let postmaster know we've started redo now, so that it can
|
||||||
|
* launch bgwriter to perform restartpoints. We don't bother
|
||||||
|
* during crash recovery as restartpoints can only be performed
|
||||||
|
* during archive recovery. And we'd like to keep crash recovery
|
||||||
|
* simple, to avoid introducing bugs that could you from
|
||||||
|
* recovering after crash.
|
||||||
|
*
|
||||||
|
* After this point, we can no longer assume that we're the only
|
||||||
|
* process in addition to postmaster! Also, fsync requests are
|
||||||
|
* subsequently to be handled by the bgwriter, not locally.
|
||||||
|
*/
|
||||||
|
if (InArchiveRecovery && IsUnderPostmaster)
|
||||||
|
{
|
||||||
|
SetForwardFsyncRequests();
|
||||||
|
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
|
||||||
|
bgwriterLaunched = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow read-only connections immediately if we're consistent already.
|
||||||
|
*/
|
||||||
|
CheckRecoveryConsistency();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the first record that logically follows the checkpoint --- it
|
* Find the first record that logically follows the checkpoint --- it
|
||||||
* might physically precede it, though.
|
* might physically precede it, though.
|
||||||
@ -5958,43 +6028,14 @@ StartupXLOG(void)
|
|||||||
{
|
{
|
||||||
bool recoveryContinue = true;
|
bool recoveryContinue = true;
|
||||||
bool recoveryApply = true;
|
bool recoveryApply = true;
|
||||||
bool reachedMinRecoveryPoint = false;
|
|
||||||
ErrorContextCallback errcontext;
|
ErrorContextCallback errcontext;
|
||||||
|
|
||||||
/* use volatile pointer to prevent code rearrangement */
|
|
||||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
|
||||||
|
|
||||||
/* initialize shared replayEndRecPtr and recoveryLastRecPtr */
|
|
||||||
SpinLockAcquire(&xlogctl->info_lck);
|
|
||||||
xlogctl->replayEndRecPtr = ReadRecPtr;
|
|
||||||
xlogctl->recoveryLastRecPtr = ReadRecPtr;
|
|
||||||
SpinLockRelease(&xlogctl->info_lck);
|
|
||||||
|
|
||||||
InRedo = true;
|
InRedo = true;
|
||||||
|
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("redo starts at %X/%X",
|
(errmsg("redo starts at %X/%X",
|
||||||
ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
|
ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
|
||||||
|
|
||||||
/*
|
|
||||||
* Let postmaster know we've started redo now, so that it can
|
|
||||||
* launch bgwriter to perform restartpoints. We don't bother
|
|
||||||
* during crash recovery as restartpoints can only be performed
|
|
||||||
* during archive recovery. And we'd like to keep crash recovery
|
|
||||||
* simple, to avoid introducing bugs that could you from
|
|
||||||
* recovering after crash.
|
|
||||||
*
|
|
||||||
* After this point, we can no longer assume that we're the only
|
|
||||||
* process in addition to postmaster! Also, fsync requests are
|
|
||||||
* subsequently to be handled by the bgwriter, not locally.
|
|
||||||
*/
|
|
||||||
if (InArchiveRecovery && IsUnderPostmaster)
|
|
||||||
{
|
|
||||||
SetForwardFsyncRequests();
|
|
||||||
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
|
|
||||||
bgwriterLaunched = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* main redo apply loop
|
* main redo apply loop
|
||||||
*/
|
*/
|
||||||
@ -6024,32 +6065,8 @@ StartupXLOG(void)
|
|||||||
/* Handle interrupt signals of startup process */
|
/* Handle interrupt signals of startup process */
|
||||||
HandleStartupProcInterrupts();
|
HandleStartupProcInterrupts();
|
||||||
|
|
||||||
/*
|
/* Allow read-only connections if we're consistent now */
|
||||||
* Have we passed our safe starting point?
|
CheckRecoveryConsistency();
|
||||||
*/
|
|
||||||
if (!reachedMinRecoveryPoint &&
|
|
||||||
XLByteLE(minRecoveryPoint, EndRecPtr) &&
|
|
||||||
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
|
|
||||||
{
|
|
||||||
reachedMinRecoveryPoint = true;
|
|
||||||
ereport(LOG,
|
|
||||||
(errmsg("consistent recovery state reached at %X/%X",
|
|
||||||
EndRecPtr.xlogid, EndRecPtr.xrecoff)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Have we got a valid starting snapshot that will allow
|
|
||||||
* queries to be run? If so, we can tell postmaster that the
|
|
||||||
* database is consistent now, enabling connections.
|
|
||||||
*/
|
|
||||||
if (standbyState == STANDBY_SNAPSHOT_READY &&
|
|
||||||
!backendsAllowed &&
|
|
||||||
reachedMinRecoveryPoint &&
|
|
||||||
IsUnderPostmaster)
|
|
||||||
{
|
|
||||||
backendsAllowed = true;
|
|
||||||
SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Have we reached our recovery target?
|
* Have we reached our recovery target?
|
||||||
@ -6398,6 +6415,44 @@ StartupXLOG(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Checks if recovery has reached a consistent state. When consistency is
|
||||||
|
* reached and we have a valid starting standby snapshot, tell postmaster
|
||||||
|
* that it can start accepting read-only connections.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
CheckRecoveryConsistency(void)
|
||||||
|
{
|
||||||
|
static bool backendsAllowed = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Have we passed our safe starting point?
|
||||||
|
*/
|
||||||
|
if (!reachedMinRecoveryPoint &&
|
||||||
|
XLByteLE(minRecoveryPoint, EndRecPtr) &&
|
||||||
|
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
|
||||||
|
{
|
||||||
|
reachedMinRecoveryPoint = true;
|
||||||
|
ereport(LOG,
|
||||||
|
(errmsg("consistent recovery state reached at %X/%X",
|
||||||
|
EndRecPtr.xlogid, EndRecPtr.xrecoff)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Have we got a valid starting snapshot that will allow
|
||||||
|
* queries to be run? If so, we can tell postmaster that the
|
||||||
|
* database is consistent now, enabling connections.
|
||||||
|
*/
|
||||||
|
if (standbyState == STANDBY_SNAPSHOT_READY &&
|
||||||
|
!backendsAllowed &&
|
||||||
|
reachedMinRecoveryPoint &&
|
||||||
|
IsUnderPostmaster)
|
||||||
|
{
|
||||||
|
backendsAllowed = true;
|
||||||
|
SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is the system still in recovery?
|
* Is the system still in recovery?
|
||||||
*
|
*
|
||||||
@ -7657,13 +7712,36 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
|||||||
if (standbyState != STANDBY_DISABLED)
|
if (standbyState != STANDBY_DISABLED)
|
||||||
CheckRequiredParameterValues(checkPoint);
|
CheckRequiredParameterValues(checkPoint);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we see a shutdown checkpoint, we know that nothing was
|
||||||
|
* running on the master at this point. So fake-up an empty
|
||||||
|
* running-xacts record and use that here and now. Recover
|
||||||
|
* additional standby state for prepared transactions.
|
||||||
|
*/
|
||||||
if (standbyState >= STANDBY_INITIALIZED)
|
if (standbyState >= STANDBY_INITIALIZED)
|
||||||
{
|
{
|
||||||
|
TransactionId *xids;
|
||||||
|
int nxids;
|
||||||
|
TransactionId oldestActiveXID;
|
||||||
|
RunningTransactionsData running;
|
||||||
|
|
||||||
|
oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove stale transactions, if any.
|
* Construct a RunningTransactions snapshot representing a shut
|
||||||
|
* down server, with only prepared transactions still alive.
|
||||||
|
* We're never overflowed at this point because all subxids
|
||||||
|
* are listed with their parent prepared transactions.
|
||||||
*/
|
*/
|
||||||
ExpireOldKnownAssignedTransactionIds(checkPoint.nextXid);
|
running.xcnt = nxids;
|
||||||
StandbyReleaseOldLocks(checkPoint.nextXid);
|
running.subxid_overflow = false;
|
||||||
|
running.nextXid = checkPoint.nextXid;
|
||||||
|
running.oldestRunningXid = oldestActiveXID;
|
||||||
|
running.xids = xids;
|
||||||
|
|
||||||
|
ProcArrayApplyRecoveryInfo(&running);
|
||||||
|
|
||||||
|
StandbyRecoverPreparedTransactions(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
|
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/twophase.h,v 1.14 2010/01/02 16:58:00 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/access/twophase.h,v 1.15 2010/04/13 14:17:46 heikki Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -44,6 +44,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
|
|||||||
|
|
||||||
extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
|
extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
|
||||||
int *nxids_p);
|
int *nxids_p);
|
||||||
|
extern void StandbyRecoverPreparedTransactions(bool overwriteOK);
|
||||||
extern void RecoverPreparedTransactions(void);
|
extern void RecoverPreparedTransactions(void);
|
||||||
|
|
||||||
extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
|
extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
|
||||||
|
Loading…
Reference in New Issue
Block a user