mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
Fix bundle bugs of GIN:
- Fix possible deadlock between UPDATE and VACUUM queries. Bug never was observed in 8.2, but it still exist there. HEAD is more sensitive to bug after recent "ring" of buffer improvements. - Fix WAL creation: if parent page is stored as is after split then incomplete split isn't removed during replay. This happens rather rare, only on large tables with a lot of updates/inserts. - Fix WAL replay: there was wrong test of XLR_BKP_BLOCK_* for left page after deletion of page. That causes wrong rightlink field: it pointed to deleted page. - add checking of match of clearing incomplete split - cleanup incomplete split list after proceeding All of this chages doesn't change on-disk storage, so backpatch... But second point may be an issue for replaying logs from previous version.
This commit is contained in:
parent
6209cb3f7d
commit
07bd1db152
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.5 2006/11/12 06:55:53 neilc Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.5.2.1 2007/06/04 15:59:19 teodor Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -358,6 +358,7 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda
|
|||||||
static XLogRecData rdata[3];
|
static XLogRecData rdata[3];
|
||||||
int sizeofitem = GinSizeOfItem(page);
|
int sizeofitem = GinSizeOfItem(page);
|
||||||
static ginxlogInsert data;
|
static ginxlogInsert data;
|
||||||
|
int cnt=0;
|
||||||
|
|
||||||
*prdata = rdata;
|
*prdata = rdata;
|
||||||
Assert(GinPageIsData(page));
|
Assert(GinPageIsData(page));
|
||||||
@ -372,21 +373,33 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda
|
|||||||
data.isData = TRUE;
|
data.isData = TRUE;
|
||||||
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
|
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prevent full page write if child's split occurs. That is needed
|
||||||
|
* to remove incomplete splits while replaying WAL
|
||||||
|
*
|
||||||
|
* data.updateBlkno contains new block number (of newly created right page)
|
||||||
|
* for recently splited page.
|
||||||
|
*/
|
||||||
|
if ( data.updateBlkno == InvalidBlockNumber )
|
||||||
|
{
|
||||||
rdata[0].buffer = buf;
|
rdata[0].buffer = buf;
|
||||||
rdata[0].buffer_std = FALSE;
|
rdata[0].buffer_std = FALSE;
|
||||||
rdata[0].data = NULL;
|
rdata[0].data = NULL;
|
||||||
rdata[0].len = 0;
|
rdata[0].len = 0;
|
||||||
rdata[0].next = &rdata[1];
|
rdata[0].next = &rdata[1];
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
|
||||||
rdata[1].buffer = InvalidBuffer;
|
rdata[cnt].buffer = InvalidBuffer;
|
||||||
rdata[1].data = (char *) &data;
|
rdata[cnt].data = (char *) &data;
|
||||||
rdata[1].len = sizeof(ginxlogInsert);
|
rdata[cnt].len = sizeof(ginxlogInsert);
|
||||||
rdata[1].next = &rdata[2];
|
rdata[cnt].next = &rdata[cnt+1];
|
||||||
|
cnt++;
|
||||||
|
|
||||||
rdata[2].buffer = InvalidBuffer;
|
rdata[cnt].buffer = InvalidBuffer;
|
||||||
rdata[2].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
|
rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
|
||||||
rdata[2].len = sizeofitem;
|
rdata[cnt].len = sizeofitem;
|
||||||
rdata[2].next = NULL;
|
rdata[cnt].next = NULL;
|
||||||
|
|
||||||
if (GinPageIsLeaf(page))
|
if (GinPageIsLeaf(page))
|
||||||
{
|
{
|
||||||
@ -402,7 +415,7 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda
|
|||||||
btree->curitem++;
|
btree->curitem++;
|
||||||
}
|
}
|
||||||
data.nitem = btree->curitem - savedPos;
|
data.nitem = btree->curitem - savedPos;
|
||||||
rdata[2].len = sizeofitem * data.nitem;
|
rdata[cnt].len = sizeofitem * data.nitem;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.5 2006/11/12 06:55:53 neilc Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.5.2.1 2007/06/04 15:59:19 teodor Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -354,6 +354,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd
|
|||||||
static XLogRecData rdata[3];
|
static XLogRecData rdata[3];
|
||||||
OffsetNumber placed;
|
OffsetNumber placed;
|
||||||
static ginxlogInsert data;
|
static ginxlogInsert data;
|
||||||
|
int cnt=0;
|
||||||
|
|
||||||
*prdata = rdata;
|
*prdata = rdata;
|
||||||
data.updateBlkno = entryPreparePage(btree, page, off);
|
data.updateBlkno = entryPreparePage(btree, page, off);
|
||||||
@ -371,21 +372,33 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd
|
|||||||
data.isData = false;
|
data.isData = false;
|
||||||
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
|
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prevent full page write if child's split occurs. That is needed
|
||||||
|
* to remove incomplete splits while replaying WAL
|
||||||
|
*
|
||||||
|
* data.updateBlkno contains new block number (of newly created right page)
|
||||||
|
* for recently splited page.
|
||||||
|
*/
|
||||||
|
if ( data.updateBlkno == InvalidBlockNumber )
|
||||||
|
{
|
||||||
rdata[0].buffer = buf;
|
rdata[0].buffer = buf;
|
||||||
rdata[0].buffer_std = TRUE;
|
rdata[0].buffer_std = TRUE;
|
||||||
rdata[0].data = NULL;
|
rdata[0].data = NULL;
|
||||||
rdata[0].len = 0;
|
rdata[0].len = 0;
|
||||||
rdata[0].next = &rdata[1];
|
rdata[0].next = &rdata[1];
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
|
||||||
rdata[1].buffer = InvalidBuffer;
|
rdata[cnt].buffer = InvalidBuffer;
|
||||||
rdata[1].data = (char *) &data;
|
rdata[cnt].data = (char *) &data;
|
||||||
rdata[1].len = sizeof(ginxlogInsert);
|
rdata[cnt].len = sizeof(ginxlogInsert);
|
||||||
rdata[1].next = &rdata[2];
|
rdata[cnt].next = &rdata[cnt+1];
|
||||||
|
cnt++;
|
||||||
|
|
||||||
rdata[2].buffer = InvalidBuffer;
|
rdata[cnt].buffer = InvalidBuffer;
|
||||||
rdata[2].data = (char *) btree->entry;
|
rdata[cnt].data = (char *) btree->entry;
|
||||||
rdata[2].len = IndexTupleSize(btree->entry);
|
rdata[cnt].len = IndexTupleSize(btree->entry);
|
||||||
rdata[2].next = NULL;
|
rdata[cnt].next = NULL;
|
||||||
|
|
||||||
btree->entry = NULL;
|
btree->entry = NULL;
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.4 2006/11/12 06:55:53 neilc Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.4.2.1 2007/06/04 15:59:19 teodor Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -17,22 +17,71 @@
|
|||||||
#include "catalog/index.h"
|
#include "catalog/index.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
|
||||||
static OffsetNumber
|
static bool
|
||||||
findItemInPage(Page page, ItemPointer item, OffsetNumber off)
|
findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
||||||
{
|
{
|
||||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
for (; off <= maxoff; off++)
|
if ( GinPageGetOpaque(page)->flags & GIN_DELETED )
|
||||||
|
/* page was deleted by concurrent vacuum */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if ( *off > maxoff || *off == InvalidOffsetNumber )
|
||||||
|
res = -1;
|
||||||
|
else
|
||||||
|
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
||||||
|
|
||||||
|
if ( res == 0 )
|
||||||
{
|
{
|
||||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, off));
|
/* page isn't changed */
|
||||||
Assert(res >= 0);
|
return true;
|
||||||
|
}
|
||||||
|
else if ( res > 0 )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* some items was added before our position, look further to find
|
||||||
|
* it or first greater
|
||||||
|
*/
|
||||||
|
|
||||||
|
(*off)++;
|
||||||
|
for (; *off <= maxoff; (*off)++)
|
||||||
|
{
|
||||||
|
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
||||||
|
|
||||||
if (res == 0)
|
if (res == 0)
|
||||||
return off;
|
return true;
|
||||||
|
|
||||||
|
if (res < 0)
|
||||||
|
{
|
||||||
|
(*off)--;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* some items was deleted before our position, look from begining
|
||||||
|
* to find it or first greater
|
||||||
|
*/
|
||||||
|
|
||||||
|
for(*off = FirstOffsetNumber; *off<= maxoff; (*off)++)
|
||||||
|
{
|
||||||
|
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
||||||
|
|
||||||
|
if ( res == 0 )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (res < 0)
|
||||||
|
{
|
||||||
|
(*off)--;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return InvalidOffsetNumber;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -111,7 +160,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs
|
|||||||
}
|
}
|
||||||
else if (entry->buffer != InvalidBuffer)
|
else if (entry->buffer != InvalidBuffer)
|
||||||
{
|
{
|
||||||
/* we should find place were we was stopped */
|
/* we should find place where we was stopped */
|
||||||
BlockNumber blkno;
|
BlockNumber blkno;
|
||||||
Page page;
|
Page page;
|
||||||
|
|
||||||
@ -125,7 +174,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs
|
|||||||
page = BufferGetPage(entry->buffer);
|
page = BufferGetPage(entry->buffer);
|
||||||
|
|
||||||
/* try to find curItem in current buffer */
|
/* try to find curItem in current buffer */
|
||||||
if ((entry->offset = findItemInPage(page, &entry->curItem, entry->offset)) != InvalidOffsetNumber)
|
if ( findItemInPage(page, &entry->curItem, &entry->offset) )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* walk to right */
|
/* walk to right */
|
||||||
@ -136,11 +185,15 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs
|
|||||||
LockBuffer(entry->buffer, GIN_SHARE);
|
LockBuffer(entry->buffer, GIN_SHARE);
|
||||||
page = BufferGetPage(entry->buffer);
|
page = BufferGetPage(entry->buffer);
|
||||||
|
|
||||||
if ((entry->offset = findItemInPage(page, &entry->curItem, FirstOffsetNumber)) != InvalidOffsetNumber)
|
entry->offset = InvalidOffsetNumber;
|
||||||
|
if ( findItemInPage(page, &entry->curItem, &entry->offset) )
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
elog(ERROR, "Logic error: lost previously founded ItemId");
|
/*
|
||||||
|
* curItem and any greated items was deleted by concurrent vacuum,
|
||||||
|
* so we finished scan with currrent entry
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.9 2006/11/30 16:22:32 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.9.2.1 2007/06/04 15:59:19 teodor Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -159,14 +159,14 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
|||||||
/*
|
/*
|
||||||
* We should be sure that we don't concurrent with inserts, insert process
|
* We should be sure that we don't concurrent with inserts, insert process
|
||||||
* never release root page until end (but it can unlock it and lock
|
* never release root page until end (but it can unlock it and lock
|
||||||
* again). If we lock root with with LockBufferForCleanup, new scan
|
* again). New scan can't start but previously started
|
||||||
* process can't begin, but previous may run. ginmarkpos/start* keeps
|
* ones work concurrently.
|
||||||
* buffer pinned, so we will wait for it. We lock only one posting tree in
|
|
||||||
* whole index, so, it's concurrent enough.. Side effect: after this is
|
|
||||||
* full complete, tree is unused by any other process
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
if ( isRoot )
|
||||||
LockBufferForCleanup(buffer);
|
LockBufferForCleanup(buffer);
|
||||||
|
else
|
||||||
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||||
|
|
||||||
Assert(GinPageIsData(page));
|
Assert(GinPageIsData(page));
|
||||||
|
|
||||||
@ -248,6 +248,8 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
|||||||
if (!isParentRoot) /* parent is already locked by
|
if (!isParentRoot) /* parent is already locked by
|
||||||
* LockBufferForCleanup() */
|
* LockBufferForCleanup() */
|
||||||
LockBuffer(pBuffer, GIN_EXCLUSIVE);
|
LockBuffer(pBuffer, GIN_EXCLUSIVE);
|
||||||
|
if (leftBlkno != InvalidBlockNumber)
|
||||||
|
LockBuffer(lBuffer, GIN_EXCLUSIVE);
|
||||||
|
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
@ -255,8 +257,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
|||||||
{
|
{
|
||||||
BlockNumber rightlink;
|
BlockNumber rightlink;
|
||||||
|
|
||||||
LockBuffer(lBuffer, GIN_EXCLUSIVE);
|
|
||||||
|
|
||||||
page = BufferGetPage(dBuffer);
|
page = BufferGetPage(dBuffer);
|
||||||
rightlink = GinPageGetOpaque(page)->rightlink;
|
rightlink = GinPageGetOpaque(page)->rightlink;
|
||||||
|
|
||||||
@ -274,6 +274,10 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
|||||||
PageDeletePostingItem(parentPage, myoff);
|
PageDeletePostingItem(parentPage, myoff);
|
||||||
|
|
||||||
page = BufferGetPage(dBuffer);
|
page = BufferGetPage(dBuffer);
|
||||||
|
/*
|
||||||
|
* we shouldn't change rightlink field to save
|
||||||
|
* workability of running search scan
|
||||||
|
*/
|
||||||
GinPageGetOpaque(page)->flags = GIN_DELETED;
|
GinPageGetOpaque(page)->flags = GIN_DELETED;
|
||||||
|
|
||||||
if (!gvs->index->rd_istemp)
|
if (!gvs->index->rd_istemp)
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.5 2006/10/04 00:29:48 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.5.2.1 2007/06/04 15:59:20 teodor Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
@ -53,6 +53,7 @@ static void
|
|||||||
forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno)
|
forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno)
|
||||||
{
|
{
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
foreach(l, incomplete_splits)
|
foreach(l, incomplete_splits)
|
||||||
{
|
{
|
||||||
@ -61,9 +62,16 @@ forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updat
|
|||||||
if (RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno)
|
if (RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno)
|
||||||
{
|
{
|
||||||
incomplete_splits = list_delete_ptr(incomplete_splits, split);
|
incomplete_splits = list_delete_ptr(incomplete_splits, split);
|
||||||
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
{
|
||||||
|
elog(ERROR, "failed to identify corresponding split record for %u/%u/%u",
|
||||||
|
node.relNode, leftBlkno, updateBlkno);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -416,7 +424,7 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
|
|||||||
UnlockReleaseBuffer(buffer);
|
UnlockReleaseBuffer(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(record->xl_info & XLR_BKP_BLOCK_2) && data->leftBlkno != InvalidBlockNumber)
|
if (!(record->xl_info & XLR_BKP_BLOCK_3) && data->leftBlkno != InvalidBlockNumber)
|
||||||
{
|
{
|
||||||
buffer = XLogReadBuffer(reln, data->leftBlkno, false);
|
buffer = XLogReadBuffer(reln, data->leftBlkno, false);
|
||||||
page = BufferGetPage(buffer);
|
page = BufferGetPage(buffer);
|
||||||
@ -594,6 +602,7 @@ gin_xlog_cleanup(void)
|
|||||||
|
|
||||||
MemoryContextSwitchTo(topCtx);
|
MemoryContextSwitchTo(topCtx);
|
||||||
MemoryContextDelete(opCtx);
|
MemoryContextDelete(opCtx);
|
||||||
|
incomplete_splits = NIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
Loading…
Reference in New Issue
Block a user