mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-03 08:00:21 +08:00
7d4d5c00f0
rather than deleting them only to have to create more. Steady state is 2*CHECKPOINT_SEGMENTS + WAL_FILES + 1 segment files, which will simply be renamed rather than constantly deleted and recreated. To make this safe, added current XLOG file/offset number to page header of XLOG pages, so that an un-overwritten page from an old incarnation of a logfile can be reliably told from a valid page. This change means that if you try to restart postmaster in a CVS-tip database after installing the change, you'll get a complaint about bad XLOG page magic number. If you don't want to initdb, run contrib/pg_resetxlog (and be sure you shut down the old postmaster cleanly).
1031 lines
28 KiB
C
1031 lines
28 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* pg_resetxlog.c
|
|
* A utility to "zero out" the xlog when it's corrupt beyond recovery.
|
|
* Can also rebuild pg_control if needed.
|
|
*
|
|
* The theory of operation is fairly simple:
|
|
* 1. Read the existing pg_control (which will include the last
|
|
* checkpoint record). If it is an old format then update to
|
|
* current format.
|
|
* 2. If pg_control is corrupt, attempt to intuit reasonable values,
|
|
* by scanning the old xlog if necessary.
|
|
* 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
|
|
* record at the start of xlog.
|
|
* 4. Flush the existing xlog files and write a new segment with
|
|
* just a checkpoint record in it. The new segment is positioned
|
|
* just past the end of the old xlog, so that existing LSNs in
|
|
* data pages will appear to be "in the past".
|
|
* This is all pretty straightforward except for the intuition part of
|
|
* step 2 ...
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* $Header: /cvsroot/pgsql/contrib/pg_resetxlog/Attic/pg_resetxlog.c,v 1.6 2001/07/19 02:12:34 tgl Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <dirent.h>
|
|
#ifdef USE_LOCALE
|
|
#include <locale.h>
|
|
#endif
|
|
|
|
#include "access/xlog.h"
|
|
#include "catalog/catversion.h"
|
|
#include "catalog/pg_control.h"
|
|
|
|
|
|
/******************** stuff copied from xlog.c ********************/
|
|
|
|
/* Increment an xlogid/segment pair */
|
|
#define NextLogSeg(logId, logSeg) \
|
|
do { \
|
|
if ((logSeg) >= XLogSegsPerFile-1) \
|
|
{ \
|
|
(logId)++; \
|
|
(logSeg) = 0; \
|
|
} \
|
|
else \
|
|
(logSeg)++; \
|
|
} while (0)
|
|
|
|
/*
|
|
* Compute ID and segment from an XLogRecPtr.
|
|
*
|
|
* For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg,
|
|
* a boundary byte is taken to be in the previous segment. This is suitable
|
|
* for deciding which segment to write given a pointer to a record end,
|
|
* for example.
|
|
*/
|
|
#define XLByteToSeg(xlrp, logId, logSeg) \
|
|
( logId = (xlrp).xlogid, \
|
|
logSeg = (xlrp).xrecoff / XLogSegSize \
|
|
)
|
|
#define XLByteToPrevSeg(xlrp, logId, logSeg) \
|
|
( logId = (xlrp).xlogid, \
|
|
logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
|
|
)
|
|
|
|
/*
|
|
* Is an XLogRecPtr within a particular XLOG segment?
|
|
*
|
|
* For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg,
|
|
* a boundary byte is taken to be in the previous segment.
|
|
*/
|
|
#define XLByteInSeg(xlrp, logId, logSeg) \
|
|
((xlrp).xlogid == (logId) && \
|
|
(xlrp).xrecoff / XLogSegSize == (logSeg))
|
|
|
|
#define XLByteInPrevSeg(xlrp, logId, logSeg) \
|
|
((xlrp).xlogid == (logId) && \
|
|
((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
|
|
|
|
|
|
#define XLogFileName(path, log, seg) \
|
|
snprintf(path, MAXPGPATH, "%s/%08X%08X", \
|
|
XLogDir, log, seg)
|
|
|
|
/*
|
|
* _INTL_MAXLOGRECSZ: max space needed for a record including header and
|
|
* any backup-block data.
|
|
*/
|
|
#define _INTL_MAXLOGRECSZ (SizeOfXLogRecord + MAXLOGRECSZ + \
|
|
XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
|
|
|
|
/******************** end of stuff copied from xlog.c ********************/
|
|
|
|
|
|
static char *DataDir; /* locations of important stuff */
|
|
static char XLogDir[MAXPGPATH];
|
|
static char ControlFilePath[MAXPGPATH];
|
|
|
|
static ControlFileData ControlFile; /* pg_control values */
|
|
static uint32 newXlogId,
|
|
newXlogSeg; /* ID/Segment of new XLOG segment */
|
|
static bool guessed = false; /* T if we had to guess at any values */
|
|
|
|
|
|
static bool CheckControlVersion0(char *buffer, int len);
|
|
|
|
|
|
static int
|
|
XLogFileOpen(uint32 log, uint32 seg)
|
|
{
|
|
char path[MAXPGPATH];
|
|
int fd;
|
|
|
|
XLogFileName(path, log, seg);
|
|
|
|
fd = open(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
|
|
return (fd);
|
|
}
|
|
|
|
|
|
/*
|
|
* Try to read the existing pg_control file.
|
|
*
|
|
* This routine is also responsible for updating old pg_control versions
|
|
* to the current format.
|
|
*/
|
|
static bool
|
|
ReadControlFile(void)
|
|
{
|
|
int fd;
|
|
int len;
|
|
char *buffer;
|
|
crc64 crc;
|
|
|
|
if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
|
|
{
|
|
|
|
/*
|
|
* If pg_control is not there at all, or we can't read it, the
|
|
* odds are we've been handed a bad DataDir path, so give up. User
|
|
* can do "touch pg_control" to force us to proceed.
|
|
*/
|
|
perror("Failed to open $PGDATA/global/pg_control for reading");
|
|
if (errno == ENOENT)
|
|
fprintf(stderr, "If you're sure the PGDATA path is correct, do\n"
|
|
" touch %s\n"
|
|
"and try again.\n", ControlFilePath);
|
|
exit(1);
|
|
}
|
|
|
|
/* Use malloc to ensure we have a maxaligned buffer */
|
|
buffer = (char *) malloc(BLCKSZ);
|
|
|
|
len = read(fd, buffer, BLCKSZ);
|
|
if (len < 0)
|
|
{
|
|
perror("Failed to read $PGDATA/global/pg_control");
|
|
exit(1);
|
|
}
|
|
close(fd);
|
|
|
|
if (len >= sizeof(ControlFileData) &&
|
|
((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
|
|
{
|
|
/* Seems to be current version --- check the CRC. */
|
|
INIT_CRC64(crc);
|
|
COMP_CRC64(crc,
|
|
buffer + sizeof(crc64),
|
|
sizeof(ControlFileData) - sizeof(crc64));
|
|
FIN_CRC64(crc);
|
|
|
|
if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
|
|
{
|
|
/* Valid data... */
|
|
memcpy(&ControlFile, buffer, sizeof(ControlFile));
|
|
return true;
|
|
}
|
|
|
|
fprintf(stderr, "pg_control exists but has invalid CRC; proceed with caution.\n");
|
|
/* We will use the data anyway, but treat it as guessed. */
|
|
memcpy(&ControlFile, buffer, sizeof(ControlFile));
|
|
guessed = true;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Maybe it's a 7.1beta pg_control.
|
|
*/
|
|
if (CheckControlVersion0(buffer, len))
|
|
return true;
|
|
|
|
/* Looks like it's a mess. */
|
|
fprintf(stderr, "pg_control exists but is broken or unknown version; ignoring it.\n");
|
|
return false;
|
|
}
|
|
|
|
|
|
/******************* routines for old XLOG format *******************/
|
|
|
|
|
|
/*
|
|
* This format was in use in 7.1 beta releases through 7.1beta5. The
|
|
* pg_control layout was different, and so were the XLOG page headers.
|
|
* The XLOG record header format was physically the same as 7.1 release,
|
|
* but interpretation of the xl_len field was not.
|
|
*/
|
|
|
|
typedef struct crc64V0
|
|
{
|
|
uint32 crc1;
|
|
uint32 crc2;
|
|
} crc64V0;
|
|
|
|
static uint32 crc_tableV0[] = {
|
|
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
|
|
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
|
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
|
|
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
|
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
|
|
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
|
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
|
|
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
|
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
|
|
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
|
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
|
|
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
|
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
|
|
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
|
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
|
|
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
|
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
|
|
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
|
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
|
|
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
|
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
|
|
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
|
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
|
|
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
|
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
|
|
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
|
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
|
|
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
|
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
|
|
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
|
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
|
|
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
|
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
|
|
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
|
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
|
|
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
|
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
|
|
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
|
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
|
|
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
|
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
|
|
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
|
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
|
|
};
|
|
|
|
#define INIT_CRC64V0(crc) ((crc).crc1 = 0xffffffff, (crc).crc2 = 0xffffffff)
|
|
#define FIN_CRC64V0(crc) ((crc).crc1 ^= 0xffffffff, (crc).crc2 ^= 0xffffffff)
|
|
#define COMP_CRC64V0(crc, data, len) \
|
|
{\
|
|
uint32 __c1 = (crc).crc1;\
|
|
uint32 __c2 = (crc).crc2;\
|
|
char *__data = (char *) (data);\
|
|
uint32 __len = (len);\
|
|
\
|
|
while (__len >= 2)\
|
|
{\
|
|
__c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
|
|
__c2 = crc_tableV0[(__c2 ^ *__data++) & 0xff] ^ (__c2 >> 8);\
|
|
__len -= 2;\
|
|
}\
|
|
if (__len > 0)\
|
|
__c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
|
|
(crc).crc1 = __c1;\
|
|
(crc).crc2 = __c2;\
|
|
}
|
|
|
|
#define EQ_CRC64V0(c1,c2) ((c1).crc1 == (c2).crc1 && (c1).crc2 == (c2).crc2)
|
|
|
|
|
|
#define LOCALE_NAME_BUFLEN_V0 128
|
|
|
|
typedef struct ControlFileDataV0
|
|
{
|
|
crc64V0 crc;
|
|
uint32 logId; /* current log file id */
|
|
uint32 logSeg; /* current log file segment (1-based) */
|
|
XLogRecPtr checkPoint; /* last check point record ptr */
|
|
time_t time; /* time stamp of last modification */
|
|
DBState state; /* see enum above */
|
|
uint32 blcksz; /* block size for this DB */
|
|
uint32 relseg_size; /* blocks per segment of large relation */
|
|
uint32 catalog_version_no; /* internal version number */
|
|
char lc_collate[LOCALE_NAME_BUFLEN_V0];
|
|
char lc_ctype[LOCALE_NAME_BUFLEN_V0];
|
|
char archdir[MAXPGPATH]; /* where to move offline log files */
|
|
} ControlFileDataV0;
|
|
|
|
typedef struct CheckPointV0
|
|
{
|
|
XLogRecPtr redo; /* next RecPtr available when we */
|
|
/* began to create CheckPoint */
|
|
/* (i.e. REDO start point) */
|
|
XLogRecPtr undo; /* first record of oldest in-progress */
|
|
/* transaction when we started */
|
|
/* (i.e. UNDO end point) */
|
|
StartUpID ThisStartUpID;
|
|
TransactionId nextXid;
|
|
Oid nextOid;
|
|
bool Shutdown;
|
|
} CheckPointV0;
|
|
|
|
typedef struct XLogRecordV0
|
|
{
|
|
crc64V0 xl_crc;
|
|
XLogRecPtr xl_prev; /* ptr to previous record in log */
|
|
XLogRecPtr xl_xact_prev; /* ptr to previous record of this xact */
|
|
TransactionId xl_xid; /* xact id */
|
|
uint16 xl_len; /* total len of record *data* */
|
|
uint8 xl_info;
|
|
RmgrId xl_rmid; /* resource manager inserted this record */
|
|
} XLogRecordV0;
|
|
|
|
#define SizeOfXLogRecordV0 DOUBLEALIGN(sizeof(XLogRecordV0))
|
|
|
|
typedef struct XLogContRecordV0
|
|
{
|
|
uint16 xl_len; /* len of data left */
|
|
} XLogContRecordV0;
|
|
|
|
#define SizeOfXLogContRecordV0 DOUBLEALIGN(sizeof(XLogContRecordV0))
|
|
|
|
#define XLOG_PAGE_MAGIC_V0 0x17345168
|
|
|
|
typedef struct XLogPageHeaderDataV0
|
|
{
|
|
uint32 xlp_magic;
|
|
uint16 xlp_info;
|
|
} XLogPageHeaderDataV0;
|
|
|
|
#define SizeOfXLogPHDV0 DOUBLEALIGN(sizeof(XLogPageHeaderDataV0))
|
|
|
|
typedef XLogPageHeaderDataV0 *XLogPageHeaderV0;
|
|
|
|
|
|
static bool RecordIsValidV0(XLogRecordV0 * record);
|
|
static XLogRecordV0 *ReadRecordV0(XLogRecPtr *RecPtr, char *buffer);
|
|
static bool ValidXLOGHeaderV0(XLogPageHeaderV0 hdr);
|
|
|
|
|
|
/*
|
|
* Try to interpret pg_control contents as "version 0" format.
|
|
*/
|
|
static bool
|
|
CheckControlVersion0(char *buffer, int len)
|
|
{
|
|
crc64V0 crc;
|
|
ControlFileDataV0 *oldfile;
|
|
XLogRecordV0 *record;
|
|
CheckPointV0 *oldchkpt;
|
|
|
|
if (len < sizeof(ControlFileDataV0))
|
|
return false;
|
|
/* Check CRC the version-0 way. */
|
|
INIT_CRC64V0(crc);
|
|
COMP_CRC64V0(crc,
|
|
buffer + sizeof(crc64V0),
|
|
sizeof(ControlFileDataV0) - sizeof(crc64V0));
|
|
FIN_CRC64V0(crc);
|
|
|
|
if (!EQ_CRC64V0(crc, ((ControlFileDataV0 *) buffer)->crc))
|
|
return false;
|
|
|
|
/* Valid data, convert useful fields to new-style pg_control format */
|
|
oldfile = (ControlFileDataV0 *) buffer;
|
|
|
|
memset(&ControlFile, 0, sizeof(ControlFile));
|
|
|
|
ControlFile.pg_control_version = PG_CONTROL_VERSION;
|
|
ControlFile.catalog_version_no = oldfile->catalog_version_no;
|
|
|
|
ControlFile.state = oldfile->state;
|
|
ControlFile.logId = oldfile->logId;
|
|
ControlFile.logSeg = oldfile->logSeg;
|
|
|
|
ControlFile.blcksz = oldfile->blcksz;
|
|
ControlFile.relseg_size = oldfile->relseg_size;
|
|
strcpy(ControlFile.lc_collate, oldfile->lc_collate);
|
|
strcpy(ControlFile.lc_ctype, oldfile->lc_ctype);
|
|
|
|
/*
|
|
* Since this format did not include a copy of the latest checkpoint
|
|
* record, we have to go rooting in the old XLOG to get that.
|
|
*/
|
|
record = ReadRecordV0(&oldfile->checkPoint,
|
|
(char *) malloc(_INTL_MAXLOGRECSZ));
|
|
if (record == NULL)
|
|
{
|
|
|
|
/*
|
|
* We have to guess at the checkpoint contents.
|
|
*/
|
|
guessed = true;
|
|
ControlFile.checkPointCopy.ThisStartUpID = 0;
|
|
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
|
|
ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
|
|
return true;
|
|
}
|
|
oldchkpt = (CheckPointV0 *) XLogRecGetData(record);
|
|
|
|
ControlFile.checkPointCopy.ThisStartUpID = oldchkpt->ThisStartUpID;
|
|
ControlFile.checkPointCopy.nextXid = oldchkpt->nextXid;
|
|
ControlFile.checkPointCopy.nextOid = oldchkpt->nextOid;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* CRC-check an XLOG V0 record. We do not believe the contents of an XLOG
|
|
* record (other than to the minimal extent of computing the amount of
|
|
* data to read in) until we've checked the CRCs.
|
|
*
|
|
* We assume all of the record has been read into memory at *record.
|
|
*/
|
|
static bool
|
|
RecordIsValidV0(XLogRecordV0 * record)
|
|
{
|
|
crc64V0 crc;
|
|
uint32 len = record->xl_len;
|
|
|
|
/*
|
|
* NB: this code is not right for V0 records containing backup blocks,
|
|
* but for now it's only going to be applied to checkpoint records, so
|
|
* I'm not going to worry about it...
|
|
*/
|
|
INIT_CRC64V0(crc);
|
|
COMP_CRC64V0(crc, XLogRecGetData(record), len);
|
|
COMP_CRC64V0(crc, (char *) record + sizeof(crc64V0),
|
|
SizeOfXLogRecordV0 - sizeof(crc64V0));
|
|
FIN_CRC64V0(crc);
|
|
|
|
if (!EQ_CRC64V0(record->xl_crc, crc))
|
|
return false;
|
|
|
|
return (true);
|
|
}
|
|
|
|
/*
|
|
* Attempt to read an XLOG V0 record at recptr.
|
|
*
|
|
* If no valid record is available, returns NULL.
|
|
*
|
|
* buffer is a workspace at least _INTL_MAXLOGRECSZ bytes long. It is needed
|
|
* to reassemble a record that crosses block boundaries. Note that on
|
|
* successful return, the returned record pointer always points at buffer.
|
|
*/
|
|
static XLogRecordV0 *
|
|
ReadRecordV0(XLogRecPtr *RecPtr, char *buffer)
|
|
{
|
|
static int readFile = -1;
|
|
static uint32 readId = 0;
|
|
static uint32 readSeg = 0;
|
|
static uint32 readOff = 0;
|
|
static char *readBuf = NULL;
|
|
|
|
XLogRecordV0 *record;
|
|
uint32 len,
|
|
total_len;
|
|
uint32 targetPageOff;
|
|
|
|
if (readBuf == NULL)
|
|
readBuf = (char *) malloc(BLCKSZ);
|
|
|
|
XLByteToSeg(*RecPtr, readId, readSeg);
|
|
if (readFile < 0)
|
|
{
|
|
readFile = XLogFileOpen(readId, readSeg);
|
|
if (readFile < 0)
|
|
goto next_record_is_invalid;
|
|
readOff = (uint32) (-1);/* force read to occur below */
|
|
}
|
|
|
|
targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
|
|
if (readOff != targetPageOff)
|
|
{
|
|
readOff = targetPageOff;
|
|
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
|
|
goto next_record_is_invalid;
|
|
if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
|
|
goto next_record_is_invalid;
|
|
if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
|
|
goto next_record_is_invalid;
|
|
}
|
|
if ((((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
|
|
RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHDV0)
|
|
goto next_record_is_invalid;
|
|
record = (XLogRecordV0 *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);
|
|
|
|
if (record->xl_len == 0)
|
|
goto next_record_is_invalid;
|
|
|
|
/*
|
|
* Compute total length of record including any appended backup
|
|
* blocks.
|
|
*/
|
|
total_len = SizeOfXLogRecordV0 + record->xl_len;
|
|
|
|
/*
|
|
* Make sure it will fit in buffer (currently, it is mechanically
|
|
* impossible for this test to fail, but it seems like a good idea
|
|
* anyway).
|
|
*/
|
|
if (total_len > _INTL_MAXLOGRECSZ)
|
|
goto next_record_is_invalid;
|
|
len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
|
|
if (total_len > len)
|
|
{
|
|
/* Need to reassemble record */
|
|
XLogContRecordV0 *contrecord;
|
|
uint32 gotlen = len;
|
|
|
|
memcpy(buffer, record, len);
|
|
record = (XLogRecordV0 *) buffer;
|
|
buffer += len;
|
|
for (;;)
|
|
{
|
|
readOff += BLCKSZ;
|
|
if (readOff >= XLogSegSize)
|
|
{
|
|
close(readFile);
|
|
readFile = -1;
|
|
NextLogSeg(readId, readSeg);
|
|
readFile = XLogFileOpen(readId, readSeg);
|
|
if (readFile < 0)
|
|
goto next_record_is_invalid;
|
|
readOff = 0;
|
|
}
|
|
if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
|
|
goto next_record_is_invalid;
|
|
if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
|
|
goto next_record_is_invalid;
|
|
if (!(((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
|
|
goto next_record_is_invalid;
|
|
contrecord = (XLogContRecordV0 *) ((char *) readBuf + SizeOfXLogPHDV0);
|
|
if (contrecord->xl_len == 0 ||
|
|
total_len != (contrecord->xl_len + gotlen))
|
|
goto next_record_is_invalid;
|
|
len = BLCKSZ - SizeOfXLogPHDV0 - SizeOfXLogContRecordV0;
|
|
if (contrecord->xl_len > len)
|
|
{
|
|
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0, len);
|
|
gotlen += len;
|
|
buffer += len;
|
|
continue;
|
|
}
|
|
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0,
|
|
contrecord->xl_len);
|
|
break;
|
|
}
|
|
if (!RecordIsValidV0(record))
|
|
goto next_record_is_invalid;
|
|
return record;
|
|
}
|
|
|
|
/* Record does not cross a page boundary */
|
|
if (!RecordIsValidV0(record))
|
|
goto next_record_is_invalid;
|
|
memcpy(buffer, record, total_len);
|
|
return (XLogRecordV0 *) buffer;
|
|
|
|
next_record_is_invalid:;
|
|
close(readFile);
|
|
readFile = -1;
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Check whether the xlog header of a page just read in looks valid.
|
|
*
|
|
* This is just a convenience subroutine to avoid duplicated code in
|
|
* ReadRecord. It's not intended for use from anywhere else.
|
|
*/
|
|
static bool
|
|
ValidXLOGHeaderV0(XLogPageHeaderV0 hdr)
|
|
{
|
|
if (hdr->xlp_magic != XLOG_PAGE_MAGIC_V0)
|
|
return false;
|
|
if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
/******************* end of routines for old XLOG format *******************/
|
|
|
|
|
|
/*
|
|
* Guess at pg_control values when we can't read the old ones.
|
|
*/
|
|
static void
|
|
GuessControlValues(void)
|
|
{
|
|
#ifdef USE_LOCALE
|
|
char *localeptr;
|
|
|
|
#endif
|
|
|
|
/*
|
|
* Set up a completely default set of pg_control values.
|
|
*/
|
|
guessed = true;
|
|
memset(&ControlFile, 0, sizeof(ControlFile));
|
|
|
|
ControlFile.pg_control_version = PG_CONTROL_VERSION;
|
|
ControlFile.catalog_version_no = CATALOG_VERSION_NO;
|
|
|
|
ControlFile.checkPointCopy.redo.xlogid = 0;
|
|
ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD;
|
|
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
|
|
ControlFile.checkPointCopy.ThisStartUpID = 0;
|
|
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
|
|
ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
|
|
ControlFile.checkPointCopy.time = time(NULL);
|
|
|
|
ControlFile.state = DB_SHUTDOWNED;
|
|
ControlFile.time = time(NULL);
|
|
ControlFile.logId = 0;
|
|
ControlFile.logSeg = 1;
|
|
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
|
|
|
|
ControlFile.blcksz = BLCKSZ;
|
|
ControlFile.relseg_size = RELSEG_SIZE;
|
|
#ifdef USE_LOCALE
|
|
localeptr = setlocale(LC_COLLATE, "");
|
|
if (!localeptr)
|
|
{
|
|
fprintf(stderr, "Invalid LC_COLLATE setting\n");
|
|
exit(1);
|
|
}
|
|
StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
|
|
localeptr = setlocale(LC_CTYPE, "");
|
|
if (!localeptr)
|
|
{
|
|
fprintf(stderr, "Invalid LC_CTYPE setting\n");
|
|
exit(1);
|
|
}
|
|
StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
|
|
#else
|
|
strcpy(ControlFile.lc_collate, "C");
|
|
strcpy(ControlFile.lc_ctype, "C");
|
|
#endif
|
|
|
|
/*
|
|
* XXX eventually, should try to grovel through old XLOG to develop
|
|
* more accurate values for startupid, nextXID, and nextOID.
|
|
*/
|
|
}
|
|
|
|
|
|
/*
|
|
* Print the guessed pg_control values when we had to guess.
|
|
*
|
|
* NB: this display should be just those fields that will not be
|
|
* reset by RewriteControlFile().
|
|
*/
|
|
static void
|
|
PrintControlValues(void)
|
|
{
|
|
printf("Guessed-at pg_control values:\n\n"
|
|
"pg_control version number: %u\n"
|
|
"Catalog version number: %u\n"
|
|
"Current log file id: %u\n"
|
|
"Next log file segment: %u\n"
|
|
"Latest checkpoint's StartUpID: %u\n"
|
|
"Latest checkpoint's NextXID: %u\n"
|
|
"Latest checkpoint's NextOID: %u\n"
|
|
"Database block size: %u\n"
|
|
"Blocks per segment of large relation: %u\n"
|
|
"LC_COLLATE: %s\n"
|
|
"LC_CTYPE: %s\n",
|
|
|
|
ControlFile.pg_control_version,
|
|
ControlFile.catalog_version_no,
|
|
ControlFile.logId,
|
|
ControlFile.logSeg,
|
|
ControlFile.checkPointCopy.ThisStartUpID,
|
|
ControlFile.checkPointCopy.nextXid,
|
|
ControlFile.checkPointCopy.nextOid,
|
|
ControlFile.blcksz,
|
|
ControlFile.relseg_size,
|
|
ControlFile.lc_collate,
|
|
ControlFile.lc_ctype);
|
|
}
|
|
|
|
|
|
/*
|
|
* Write out the new pg_control file.
|
|
*/
|
|
static void
|
|
RewriteControlFile(void)
|
|
{
|
|
int fd;
|
|
char buffer[BLCKSZ]; /* need not be aligned */
|
|
|
|
/*
|
|
* Adjust fields as needed to force an empty XLOG starting at the next
|
|
* available segment.
|
|
*/
|
|
newXlogId = ControlFile.logId;
|
|
newXlogSeg = ControlFile.logSeg;
|
|
/* be sure we wrap around correctly at end of a logfile */
|
|
NextLogSeg(newXlogId, newXlogSeg);
|
|
|
|
ControlFile.checkPointCopy.redo.xlogid = newXlogId;
|
|
ControlFile.checkPointCopy.redo.xrecoff =
|
|
newXlogSeg * XLogSegSize + SizeOfXLogPHD;
|
|
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
|
|
ControlFile.checkPointCopy.time = time(NULL);
|
|
|
|
ControlFile.state = DB_SHUTDOWNED;
|
|
ControlFile.time = time(NULL);
|
|
ControlFile.logId = newXlogId;
|
|
ControlFile.logSeg = newXlogSeg + 1;
|
|
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
|
|
ControlFile.prevCheckPoint.xlogid = 0;
|
|
ControlFile.prevCheckPoint.xrecoff = 0;
|
|
|
|
/* Contents are protected with a CRC */
|
|
INIT_CRC64(ControlFile.crc);
|
|
COMP_CRC64(ControlFile.crc,
|
|
(char *) &ControlFile + sizeof(crc64),
|
|
sizeof(ControlFileData) - sizeof(crc64));
|
|
FIN_CRC64(ControlFile.crc);
|
|
|
|
/*
|
|
* We write out BLCKSZ bytes into pg_control, zero-padding the excess
|
|
* over sizeof(ControlFileData). This reduces the odds of
|
|
* premature-EOF errors when reading pg_control. We'll still fail
|
|
* when we check the contents of the file, but hopefully with a more
|
|
* specific error than "couldn't read pg_control".
|
|
*/
|
|
if (sizeof(ControlFileData) > BLCKSZ)
|
|
{
|
|
fprintf(stderr, "sizeof(ControlFileData) is too large ... fix xlog.c\n");
|
|
exit(1);
|
|
}
|
|
|
|
memset(buffer, 0, BLCKSZ);
|
|
memcpy(buffer, &ControlFile, sizeof(ControlFileData));
|
|
|
|
unlink(ControlFilePath);
|
|
|
|
fd = open(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
|
|
if (fd < 0)
|
|
{
|
|
perror("RewriteControlFile failed to create pg_control file");
|
|
exit(1);
|
|
}
|
|
|
|
errno = 0;
|
|
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
|
|
{
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
if (errno == 0)
|
|
errno = ENOSPC;
|
|
perror("RewriteControlFile failed to write pg_control file");
|
|
exit(1);
|
|
}
|
|
|
|
if (fsync(fd) != 0)
|
|
{
|
|
perror("fsync");
|
|
exit(1);
|
|
}
|
|
|
|
close(fd);
|
|
}
|
|
|
|
|
|
/*
|
|
* Remove existing XLOG files
|
|
*/
|
|
static void
|
|
KillExistingXLOG(void)
|
|
{
|
|
DIR *xldir;
|
|
struct dirent *xlde;
|
|
char path[MAXPGPATH];
|
|
|
|
xldir = opendir(XLogDir);
|
|
if (xldir == NULL)
|
|
{
|
|
perror("KillExistingXLOG: cannot open $PGDATA/pg_xlog directory");
|
|
exit(1);
|
|
}
|
|
|
|
errno = 0;
|
|
while ((xlde = readdir(xldir)) != NULL)
|
|
{
|
|
if (strlen(xlde->d_name) == 16 &&
|
|
strspn(xlde->d_name, "0123456789ABCDEF") == 16)
|
|
{
|
|
sprintf(path, "%s/%s", XLogDir, xlde->d_name);
|
|
if (unlink(path) < 0)
|
|
{
|
|
perror(path);
|
|
exit(1);
|
|
}
|
|
}
|
|
errno = 0;
|
|
}
|
|
if (errno)
|
|
{
|
|
perror("KillExistingXLOG: cannot read $PGDATA/pg_xlog directory");
|
|
exit(1);
|
|
}
|
|
closedir(xldir);
|
|
}
|
|
|
|
|
|
/*
|
|
* Write an empty XLOG file, containing only the checkpoint record
|
|
* already set up in ControlFile.
|
|
*/
|
|
static void
|
|
WriteEmptyXLOG(void)
|
|
{
|
|
char *buffer;
|
|
XLogPageHeader page;
|
|
XLogRecord *record;
|
|
crc64 crc;
|
|
char path[MAXPGPATH];
|
|
int fd;
|
|
int nbytes;
|
|
|
|
/* Use malloc() to ensure buffer is MAXALIGNED */
|
|
buffer = (char *) malloc(BLCKSZ);
|
|
page = (XLogPageHeader) buffer;
|
|
|
|
/* Set up the first page with initial record */
|
|
memset(buffer, 0, BLCKSZ);
|
|
page->xlp_magic = XLOG_PAGE_MAGIC;
|
|
page->xlp_info = 0;
|
|
page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID;
|
|
page->xlp_pageaddr.xlogid =
|
|
ControlFile.checkPointCopy.redo.xlogid;
|
|
page->xlp_pageaddr.xrecoff =
|
|
ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogPHD;
|
|
record = (XLogRecord *) ((char *) page + SizeOfXLogPHD);
|
|
record->xl_prev.xlogid = 0;
|
|
record->xl_prev.xrecoff = 0;
|
|
record->xl_xact_prev = record->xl_prev;
|
|
record->xl_xid = InvalidTransactionId;
|
|
record->xl_len = sizeof(CheckPoint);
|
|
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
|
|
record->xl_rmid = RM_XLOG_ID;
|
|
memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
|
|
sizeof(CheckPoint));
|
|
|
|
INIT_CRC64(crc);
|
|
COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
|
|
COMP_CRC64(crc, (char *) record + sizeof(crc64),
|
|
SizeOfXLogRecord - sizeof(crc64));
|
|
FIN_CRC64(crc);
|
|
record->xl_crc = crc;
|
|
|
|
/* Write the first page */
|
|
XLogFileName(path, newXlogId, newXlogSeg);
|
|
|
|
unlink(path);
|
|
|
|
fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
|
|
S_IRUSR | S_IWUSR);
|
|
if (fd < 0)
|
|
{
|
|
perror(path);
|
|
exit(1);
|
|
}
|
|
|
|
errno = 0;
|
|
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
|
|
{
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
if (errno == 0)
|
|
errno = ENOSPC;
|
|
perror("WriteEmptyXLOG: failed to write xlog file");
|
|
exit(1);
|
|
}
|
|
|
|
/* Fill the rest of the file with zeroes */
|
|
memset(buffer, 0, BLCKSZ);
|
|
for (nbytes = BLCKSZ; nbytes < XLogSegSize; nbytes += BLCKSZ)
|
|
{
|
|
errno = 0;
|
|
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
|
|
{
|
|
if (errno == 0)
|
|
errno = ENOSPC;
|
|
perror("WriteEmptyXLOG: failed to write xlog file");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
if (fsync(fd) != 0)
|
|
{
|
|
perror("fsync");
|
|
exit(1);
|
|
}
|
|
|
|
close(fd);
|
|
}
|
|
|
|
|
|
static void
|
|
usage(void)
|
|
{
|
|
fprintf(stderr, "Usage: pg_resetxlog [-f] [-n] PGDataDirectory\n\n"
|
|
" -f\tforce update to be done\n"
|
|
" -n\tno update, just show extracted pg_control values (for testing)\n");
|
|
exit(1);
|
|
}
|
|
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
int argn;
|
|
bool force = false;
|
|
bool noupdate = false;
|
|
int fd;
|
|
char path[MAXPGPATH];
|
|
|
|
for (argn = 1; argn < argc; argn++)
|
|
{
|
|
if (argv[argn][0] != '-')
|
|
break; /* end of switches */
|
|
if (strcmp(argv[argn], "-f") == 0)
|
|
force = true;
|
|
else if (strcmp(argv[argn], "-n") == 0)
|
|
noupdate = true;
|
|
else
|
|
usage();
|
|
}
|
|
|
|
if (argn != argc - 1) /* one required non-switch argument */
|
|
usage();
|
|
|
|
DataDir = argv[argn++];
|
|
|
|
snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
|
|
|
|
snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
|
|
|
|
/*
|
|
* Check for a postmaster lock file --- if there is one, refuse to
|
|
* proceed, on grounds we might be interfering with a live
|
|
* installation.
|
|
*/
|
|
snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
|
|
|
|
if ((fd = open(path, O_RDONLY)) < 0)
|
|
{
|
|
if (errno != ENOENT)
|
|
{
|
|
perror("Failed to open $PGDATA/postmaster.pid for reading");
|
|
exit(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fprintf(stderr, "Lock file '%s' exists --- is a postmaster running?\n"
|
|
"If not, delete the lock file and try again.\n",
|
|
path);
|
|
exit(1);
|
|
}
|
|
|
|
/*
|
|
* Attempt to read the existing pg_control file
|
|
*/
|
|
if (!ReadControlFile())
|
|
GuessControlValues();
|
|
|
|
/*
|
|
* If we had to guess anything, and -f was not given, just print the
|
|
* guessed values and exit. Also print if -n is given.
|
|
*/
|
|
if ((guessed && !force) || noupdate)
|
|
{
|
|
PrintControlValues();
|
|
if (!noupdate)
|
|
printf("\nIf these values seem acceptable, use -f to force reset.\n");
|
|
exit(1);
|
|
}
|
|
|
|
/*
|
|
* Don't reset from a dirty pg_control without -f, either.
|
|
*/
|
|
if (ControlFile.state != DB_SHUTDOWNED && !force)
|
|
{
|
|
printf("The database was not shut down cleanly.\n"
|
|
"Resetting the xlog may cause data to be lost!\n"
|
|
"If you want to proceed anyway, use -f to force reset.\n");
|
|
exit(1);
|
|
}
|
|
|
|
/*
|
|
* Else, do the dirty deed.
|
|
*/
|
|
RewriteControlFile();
|
|
KillExistingXLOG();
|
|
WriteEmptyXLOG();
|
|
|
|
printf("XLOG reset.\n");
|
|
return 0;
|
|
}
|