From 09c2e7cd2ff0b884625c37ce8249832820c58710 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 17 Jul 2017 12:03:35 -0400 Subject: [PATCH] hash: Fix write-ahead logging bugs related to init forks. One, logging for CREATE INDEX was oblivious to the fact that when an unlogged table is created, *only* operations on the init fork should be logged. Two, init fork buffers need to be flushed after they are written; otherwise, a filesystem-level copy following recovery may do the wrong thing. (There may be a better fix for this issue than the one used here, but this is transposed from the similar logic already present in XLogReadBufferForRedoExtended, and a broader refactoring after beta2 seems inadvisable.) Amit Kapila, reviewed by Ashutosh Sharma, Kyotaro Horiguchi, and Michael Paquier Discussion: http://postgr.es/m/CAA4eK1JpcMsEtOL_J7WODumeEfyrPi7FPYHeVdS7fyyrCrgp4w@mail.gmail.com --- src/backend/access/hash/hash_xlog.c | 27 +++++++++++++++++++++++++++ src/backend/access/hash/hashpage.c | 23 ++++++++++++++++------- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/backend/access/hash/hash_xlog.c b/src/backend/access/hash/hash_xlog.c index 429af11f4d..67a856c142 100644 --- a/src/backend/access/hash/hash_xlog.c +++ b/src/backend/access/hash/hash_xlog.c @@ -33,6 +33,7 @@ hash_xlog_init_meta_page(XLogReaderState *record) XLogRecPtr lsn = record->EndRecPtr; Page page; Buffer metabuf; + ForkNumber forknum; xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record); @@ -44,6 +45,17 @@ hash_xlog_init_meta_page(XLogReaderState *record) page = (Page) BufferGetPage(metabuf); PageSetLSN(page, lsn); MarkBufferDirty(metabuf); + + /* + * Force the on-disk state of init forks to always be in sync with the + * state in shared buffers. See XLogReadBufferForRedoExtended. We need + * special handling for init forks as create index operations don't log a + * full page image of the metapage. + */ + XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL); + if (forknum == INIT_FORKNUM) + FlushOneBuffer(metabuf); + /* all done */ UnlockReleaseBuffer(metabuf); } @@ -60,6 +72,7 @@ hash_xlog_init_bitmap_page(XLogReaderState *record) Page page; HashMetaPage metap; uint32 num_buckets; + ForkNumber forknum; xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record); @@ -70,6 +83,16 @@ hash_xlog_init_bitmap_page(XLogReaderState *record) _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true); PageSetLSN(BufferGetPage(bitmapbuf), lsn); MarkBufferDirty(bitmapbuf); + + /* + * Force the on-disk state of init forks to always be in sync with the + * state in shared buffers. See XLogReadBufferForRedoExtended. We need + * special handling for init forks as create index operations don't log a + * full page image of the metapage. + */ + XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL); + if (forknum == INIT_FORKNUM) + FlushOneBuffer(bitmapbuf); UnlockReleaseBuffer(bitmapbuf); /* add the new bitmap page to the metapage's list of bitmaps */ @@ -90,6 +113,10 @@ hash_xlog_init_bitmap_page(XLogReaderState *record) PageSetLSN(page, lsn); MarkBufferDirty(metabuf); + + XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL); + if (forknum == INIT_FORKNUM) + FlushOneBuffer(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 1cb18a7513..d5b6502775 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -345,12 +345,20 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum) int32 ffactor; uint32 num_buckets; uint32 i; + bool use_wal; /* safety check */ if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0) elog(ERROR, "cannot initialize non-empty hash index \"%s\"", RelationGetRelationName(rel)); + /* + * WAL log creation of pages if the relation is persistent, or this is the + * init fork. Init forks for unlogged relations always need to be WAL + * logged. + */ + use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM; + /* * Determine the target fill factor (in tuples per bucket) for this index. * The idea is to make the fill factor correspond to pages about as full @@ -384,7 +392,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum) metap = HashPageGetMeta(pg); /* XLOG stuff */ - if (RelationNeedsWAL(rel)) + if (use_wal) { xl_hash_init_meta_page xlrec; XLogRecPtr recptr; @@ -427,11 +435,12 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum) _hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false); MarkBufferDirty(buf); - log_newpage(&rel->rd_node, - forkNum, - blkno, - BufferGetPage(buf), - true); + if (use_wal) + log_newpage(&rel->rd_node, + forkNum, + blkno, + BufferGetPage(buf), + true); _hash_relbuf(rel, buf); } @@ -459,7 +468,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum) MarkBufferDirty(metabuf); /* XLOG stuff */ - if (RelationNeedsWAL(rel)) + if (use_wal) { xl_hash_init_bitmap_page xlrec; XLogRecPtr recptr;