diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml index fa3a701631..39bb25c8e2 100644 --- a/doc/src/sgml/backup.sgml +++ b/doc/src/sgml/backup.sgml @@ -568,7 +568,7 @@ tar -cf backup.tar /usr/local/pgsql/data normally creates just a few segment files and then recycles them by renaming no-longer-needed segment files to higher segment numbers. It's assumed that segment files whose - contents precede the checkpoint-before-last are no longer of + contents precede the last checkpoint are no longer of interest and can be recycled. diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e571292bf4..f901567f7e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -17948,11 +17948,6 @@ SELECT collation for ('foo' COLLATE "de_DE"); pg_lsn - - prior_lsn - pg_lsn - - redo_lsn pg_lsn diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index dd028a12a4..84f662c824 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2221,13 +2221,18 @@ CalculateCheckpointSegments(void) * Calculate the distance at which to trigger a checkpoint, to avoid * exceeding max_wal_size_mb. This is based on two assumptions: * - * a) we keep WAL for two checkpoint cycles, back to the "prev" checkpoint. + * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept + * WAL for two checkpoint cycles to allow us to recover from the + * secondary checkpoint if the first checkpoint failed, though we + * only did this on the master anyway, not on standby. Keeping just + * one checkpoint simplifies processing and reduces disk space in + * many smaller databases.) * b) during checkpoint, we consume checkpoint_completion_target * * number of segments consumed between checkpoints. *------- */ target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) / - (2.0 + CheckPointCompletionTarget); + (1.0 + CheckPointCompletionTarget); /* round down */ CheckPointSegments = (int) target; @@ -2279,23 +2284,8 @@ XLOGfileslop(XLogRecPtr PriorRedoPtr) * To estimate where the next checkpoint will finish, assume that the * system runs steadily consuming CheckPointDistanceEstimate bytes between * every checkpoint. - * - * The reason this calculation is done from the prior checkpoint, not the - * one that just finished, is that this behaves better if some checkpoint - * cycles are abnormally short, like if you perform a manual checkpoint - * right after a timed one. The manual checkpoint will make almost a full - * cycle's worth of WAL segments available for recycling, because the - * segments from the prior's prior, fully-sized checkpoint cycle are no - * longer needed. However, the next checkpoint will make only few segments - * available for recycling, the ones generated between the timed - * checkpoint and the manual one right after that. If at the manual - * checkpoint we only retained enough segments to get us to the next timed - * one, and removed the rest, then at the next checkpoint we would not - * have enough segments around for recycling, to get us to the checkpoint - * after that. Basing the calculations on the distance from the prior redo - * pointer largely fixes that problem. */ - distance = (2.0 + CheckPointCompletionTarget) * CheckPointDistanceEstimate; + distance = (1.0 + CheckPointCompletionTarget) * CheckPointDistanceEstimate; /* add 10% for good measure. */ distance *= 1.10; @@ -6593,30 +6583,17 @@ StartupXLOG(void) (errmsg("checkpoint record is at %X/%X", (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc))); } - else if (StandbyMode) + else { /* - * The last valid checkpoint record required for a streaming - * recovery exists in neither standby nor the primary. + * We used to attempt to go back to a secondary checkpoint + * record here, but only when not in standby_mode. We now + * just fail if we can't read the last checkpoint because + * this allows us to simplify processing around checkpoints. */ ereport(PANIC, (errmsg("could not locate a valid checkpoint record"))); } - else - { - checkPointLoc = ControlFile->prevCheckPoint; - record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true); - if (record != NULL) - { - ereport(LOG, - (errmsg("using previous checkpoint record at %X/%X", - (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc))); - InRecovery = true; /* force recovery even if SHUTDOWNED */ - } - else - ereport(PANIC, - (errmsg("could not locate a valid checkpoint record"))); - } memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint)); wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN); } @@ -6845,7 +6822,6 @@ StartupXLOG(void) recoveryTargetTLI))); ControlFile->state = DB_IN_CRASH_RECOVERY; } - ControlFile->prevCheckPoint = ControlFile->checkPoint; ControlFile->checkPoint = checkPointLoc; ControlFile->checkPointCopy = checkPoint; if (InArchiveRecovery) @@ -7619,12 +7595,11 @@ StartupXLOG(void) { if (fast_promote) { - checkPointLoc = ControlFile->prevCheckPoint; + checkPointLoc = ControlFile->checkPoint; /* * Confirm the last checkpoint is available for us to recover - * from if we fail. Note that we don't check for the secondary - * checkpoint since that isn't available in most base backups. + * from if we fail. */ record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, false); if (record != NULL) @@ -8090,7 +8065,7 @@ LocalSetXLogInsertAllowed(void) * Subroutine to try to fetch and validate a prior checkpoint record. * * whichChkpt identifies the checkpoint (merely for reporting purposes). - * 1 for "primary", 2 for "secondary", 0 for "other" (backup_label) + * 1 for "primary", 0 for "other" (backup_label) */ static XLogRecord * ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, @@ -8110,10 +8085,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, ereport(LOG, (errmsg("invalid primary checkpoint link in control file"))); break; - case 2: - ereport(LOG, - (errmsg("invalid secondary checkpoint link in control file"))); - break; default: ereport(LOG, (errmsg("invalid checkpoint link in backup_label file"))); @@ -8135,10 +8106,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, ereport(LOG, (errmsg("invalid primary checkpoint record"))); break; - case 2: - ereport(LOG, - (errmsg("invalid secondary checkpoint record"))); - break; default: ereport(LOG, (errmsg("invalid checkpoint record"))); @@ -8154,10 +8121,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, ereport(LOG, (errmsg("invalid resource manager ID in primary checkpoint record"))); break; - case 2: - ereport(LOG, - (errmsg("invalid resource manager ID in secondary checkpoint record"))); - break; default: ereport(LOG, (errmsg("invalid resource manager ID in checkpoint record"))); @@ -8175,10 +8138,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, ereport(LOG, (errmsg("invalid xl_info in primary checkpoint record"))); break; - case 2: - ereport(LOG, - (errmsg("invalid xl_info in secondary checkpoint record"))); - break; default: ereport(LOG, (errmsg("invalid xl_info in checkpoint record"))); @@ -8194,10 +8153,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, ereport(LOG, (errmsg("invalid length of primary checkpoint record"))); break; - case 2: - ereport(LOG, - (errmsg("invalid length of secondary checkpoint record"))); - break; default: ereport(LOG, (errmsg("invalid length of checkpoint record"))); @@ -8933,8 +8888,7 @@ CreateCheckPoint(int flags) (errmsg("concurrent write-ahead log activity while database system is shutting down"))); /* - * Remember the prior checkpoint's redo pointer, used later to determine - * the point where the log can be truncated. + * Remember the prior checkpoint's redo ptr for UpdateCheckPointDistanceEstimate() */ PriorRedoPtr = ControlFile->checkPointCopy.redo; @@ -8944,7 +8898,6 @@ CreateCheckPoint(int flags) LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); if (shutdown) ControlFile->state = DB_SHUTDOWNED; - ControlFile->prevCheckPoint = ControlFile->checkPoint; ControlFile->checkPoint = ProcLastRecPtr; ControlFile->checkPointCopy = checkPoint; ControlFile->time = (pg_time_t) time(NULL); @@ -8982,8 +8935,7 @@ CreateCheckPoint(int flags) smgrpostckpt(); /* - * Delete old log files (those no longer needed even for previous - * checkpoint or the standbys in XLOG streaming). + * Delete old log files and recycle them */ if (PriorRedoPtr != InvalidXLogRecPtr) { @@ -8992,7 +8944,8 @@ CreateCheckPoint(int flags) /* Update the average distance between checkpoints. */ UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr); - XLByteToSeg(PriorRedoPtr, _logSegNo, wal_segment_size); + /* Trim from the last checkpoint, not the last - 1 */ + XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); KeepLogSeg(recptr, &_logSegNo); _logSegNo--; RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr); @@ -9258,8 +9211,7 @@ CreateRestartPoint(int flags) CheckPointGuts(lastCheckPoint.redo, flags); /* - * Remember the prior checkpoint's redo pointer, used later to determine - * the point at which we can truncate the log. + * Remember the prior checkpoint's redo ptr for UpdateCheckPointDistanceEstimate() */ PriorRedoPtr = ControlFile->checkPointCopy.redo; @@ -9273,7 +9225,6 @@ CreateRestartPoint(int flags) if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY && ControlFile->checkPointCopy.redo < lastCheckPoint.redo) { - ControlFile->prevCheckPoint = ControlFile->checkPoint; ControlFile->checkPoint = lastCheckPointRecPtr; ControlFile->checkPointCopy = lastCheckPoint; ControlFile->time = (pg_time_t) time(NULL); diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index bc2ca8731d..1b5086a45d 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -93,41 +93,39 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) tupdesc = CreateTemplateTupleDesc(19, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "checkpoint_lsn", LSNOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "prior_lsn", + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "redo_lsn", LSNOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 3, "redo_lsn", - LSNOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 4, "redo_wal_file", + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "redo_wal_file", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 5, "timeline_id", + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "timeline_id", INT4OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 6, "prev_timeline_id", + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "prev_timeline_id", INT4OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 7, "full_page_writes", + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "full_page_writes", BOOLOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 8, "next_xid", + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "next_xid", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 9, "next_oid", + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "next_oid", OIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 10, "next_multixact_id", + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "next_multixact_id", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 11, "next_multi_offset", + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "next_multi_offset", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 12, "oldest_xid", + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "oldest_xid", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 13, "oldest_xid_dbid", + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "oldest_xid_dbid", OIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 14, "oldest_active_xid", + TupleDescInitEntry(tupdesc, (AttrNumber) 13, "oldest_active_xid", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 15, "oldest_multi_xid", + TupleDescInitEntry(tupdesc, (AttrNumber) 14, "oldest_multi_xid", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 16, "oldest_multi_dbid", + TupleDescInitEntry(tupdesc, (AttrNumber) 15, "oldest_multi_dbid", OIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 17, "oldest_commit_ts_xid", + TupleDescInitEntry(tupdesc, (AttrNumber) 16, "oldest_commit_ts_xid", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 18, "newest_commit_ts_xid", + TupleDescInitEntry(tupdesc, (AttrNumber) 17, "newest_commit_ts_xid", XIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 19, "checkpoint_time", + TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time", TIMESTAMPTZOID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); @@ -149,62 +147,59 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) values[0] = LSNGetDatum(ControlFile->checkPoint); nulls[0] = false; - values[1] = LSNGetDatum(ControlFile->prevCheckPoint); + values[1] = LSNGetDatum(ControlFile->checkPointCopy.redo); nulls[1] = false; - values[2] = LSNGetDatum(ControlFile->checkPointCopy.redo); + values[2] = CStringGetTextDatum(xlogfilename); nulls[2] = false; - values[3] = CStringGetTextDatum(xlogfilename); + values[3] = Int32GetDatum(ControlFile->checkPointCopy.ThisTimeLineID); nulls[3] = false; - values[4] = Int32GetDatum(ControlFile->checkPointCopy.ThisTimeLineID); + values[4] = Int32GetDatum(ControlFile->checkPointCopy.PrevTimeLineID); nulls[4] = false; - values[5] = Int32GetDatum(ControlFile->checkPointCopy.PrevTimeLineID); + values[5] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites); nulls[5] = false; - values[6] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites); - nulls[6] = false; - - values[7] = CStringGetTextDatum(psprintf("%u:%u", + values[6] = CStringGetTextDatum(psprintf("%u:%u", ControlFile->checkPointCopy.nextXidEpoch, ControlFile->checkPointCopy.nextXid)); + nulls[6] = false; + + values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid); nulls[7] = false; - values[8] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid); + values[8] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMulti); nulls[8] = false; - values[9] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMulti); + values[9] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMultiOffset); nulls[9] = false; - values[10] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMultiOffset); + values[10] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestXid); nulls[10] = false; - values[11] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestXid); + values[11] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestXidDB); nulls[11] = false; - values[12] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestXidDB); + values[12] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestActiveXid); nulls[12] = false; - values[13] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestActiveXid); + values[13] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestMulti); nulls[13] = false; - values[14] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestMulti); + values[14] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestMultiDB); nulls[14] = false; - values[15] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestMultiDB); + values[15] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestCommitTsXid); nulls[15] = false; - values[16] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestCommitTsXid); + values[16] = TransactionIdGetDatum(ControlFile->checkPointCopy.newestCommitTsXid); nulls[16] = false; - values[17] = TransactionIdGetDatum(ControlFile->checkPointCopy.newestCommitTsXid); - nulls[17] = false; - - values[18] = TimestampTzGetDatum( + values[17] = TimestampTzGetDatum( time_t_to_timestamptz(ControlFile->checkPointCopy.time)); - nulls[18] = false; + nulls[17] = false; htup = heap_form_tuple(tupdesc, values, nulls); diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 8cc4fb0341..cc73b7d6c2 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -222,9 +222,6 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint location: %X/%X\n"), (uint32) (ControlFile->checkPoint >> 32), (uint32) ControlFile->checkPoint); - printf(_("Prior checkpoint location: %X/%X\n"), - (uint32) (ControlFile->prevCheckPoint >> 32), - (uint32) ControlFile->prevCheckPoint); printf(_("Latest checkpoint's REDO location: %X/%X\n"), (uint32) (ControlFile->checkPointCopy.redo >> 32), (uint32) ControlFile->checkPointCopy.redo); diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 25d5547b36..9f93385f44 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -876,7 +876,6 @@ RewriteControlFile(void) ControlFile.state = DB_SHUTDOWNED; ControlFile.time = (pg_time_t) time(NULL); ControlFile.checkPoint = ControlFile.checkPointCopy.redo; - ControlFile.prevCheckPoint = 0; ControlFile.minRecoveryPoint = 0; ControlFile.minRecoveryPointTLI = 0; ControlFile.backupStartPoint = 0; diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 3fed3b6431..9e9e01427e 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -21,7 +21,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 1003 +#define PG_CONTROL_VERSION 1100 /* Nonce key length, see below */ #define MOCK_AUTH_NONCE_LEN 32 @@ -127,7 +127,6 @@ typedef struct ControlFileData DBState state; /* see enum above */ pg_time_t time; /* time stamp of last pg_control update */ XLogRecPtr checkPoint; /* last check point record ptr */ - XLogRecPtr prevCheckPoint; /* previous check point record ptr */ CheckPoint checkPointCopy; /* copy of last check point record */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 93c031aad7..5e3e7228d6 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5500,7 +5500,7 @@ DESCR("pg_config binary as a function"); DATA(insert OID = 3441 ( pg_control_system PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,20,1184}" "{o,o,o,o}" "{pg_control_version,catalog_version_no,system_identifier,pg_control_last_modified}" _null_ _null_ pg_control_system _null_ _null_ _null_ )); DESCR("pg_controldata general state information as a function"); -DATA(insert OID = 3442 ( pg_control_checkpoint PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{3220,3220,3220,25,23,23,16,25,26,28,28,28,26,28,28,26,28,28,1184}" "{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}" "{checkpoint_lsn,prior_lsn,redo_lsn,redo_wal_file,timeline_id,prev_timeline_id,full_page_writes,next_xid,next_oid,next_multixact_id,next_multi_offset,oldest_xid,oldest_xid_dbid,oldest_active_xid,oldest_multi_xid,oldest_multi_dbid,oldest_commit_ts_xid,newest_commit_ts_xid,checkpoint_time}" _null_ _null_ pg_control_checkpoint _null_ _null_ _null_ )); +DATA(insert OID = 3442 ( pg_control_checkpoint PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{3220,3220,25,23,23,16,25,26,28,28,28,26,28,28,26,28,28,1184}" "{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}" "{checkpoint_lsn,redo_lsn,redo_wal_file,timeline_id,prev_timeline_id,full_page_writes,next_xid,next_oid,next_multixact_id,next_multi_offset,oldest_xid,oldest_xid_dbid,oldest_active_xid,oldest_multi_xid,oldest_multi_dbid,oldest_commit_ts_xid,newest_commit_ts_xid,checkpoint_time}" _null_ _null_ pg_control_checkpoint _null_ _null_ _null_ )); DESCR("pg_controldata checkpoint state information as a function"); DATA(insert OID = 3443 ( pg_control_recovery PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{3220,23,3220,3220,16}" "{o,o,o,o,o}" "{min_recovery_end_lsn,min_recovery_end_timeline,backup_start_lsn,backup_end_lsn,end_of_backup_record_required}" _null_ _null_ pg_control_recovery _null_ _null_ _null_ ));