diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index 9c9712b446..64098d7040 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.5 2006/11/12 06:55:53 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.5.2.1 2007/06/04 15:59:19 teodor Exp $ *------------------------------------------------------------------------- */ @@ -358,6 +358,7 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda static XLogRecData rdata[3]; int sizeofitem = GinSizeOfItem(page); static ginxlogInsert data; + int cnt=0; *prdata = rdata; Assert(GinPageIsData(page)); @@ -372,21 +373,33 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda data.isData = TRUE; data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; - rdata[0].buffer = buf; - rdata[0].buffer_std = FALSE; - rdata[0].data = NULL; - rdata[0].len = 0; - rdata[0].next = &rdata[1]; + /* + * Prevent full page write if child's split occurs. That is needed + * to remove incomplete splits while replaying WAL + * + * data.updateBlkno contains new block number (of newly created right page) + * for recently splited page. + */ + if ( data.updateBlkno == InvalidBlockNumber ) + { + rdata[0].buffer = buf; + rdata[0].buffer_std = FALSE; + rdata[0].data = NULL; + rdata[0].len = 0; + rdata[0].next = &rdata[1]; + cnt++; + } - rdata[1].buffer = InvalidBuffer; - rdata[1].data = (char *) &data; - rdata[1].len = sizeof(ginxlogInsert); - rdata[1].next = &rdata[2]; + rdata[cnt].buffer = InvalidBuffer; + rdata[cnt].data = (char *) &data; + rdata[cnt].len = sizeof(ginxlogInsert); + rdata[cnt].next = &rdata[cnt+1]; + cnt++; - rdata[2].buffer = InvalidBuffer; - rdata[2].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem)); - rdata[2].len = sizeofitem; - rdata[2].next = NULL; + rdata[cnt].buffer = InvalidBuffer; + rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem)); + rdata[cnt].len = sizeofitem; + rdata[cnt].next = NULL; if (GinPageIsLeaf(page)) { @@ -402,7 +415,7 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda btree->curitem++; } data.nitem = btree->curitem - savedPos; - rdata[2].len = sizeofitem * data.nitem; + rdata[cnt].len = sizeofitem * data.nitem; } else { diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 5dc3ac57b3..ca40966603 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.5 2006/11/12 06:55:53 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.5.2.1 2007/06/04 15:59:19 teodor Exp $ *------------------------------------------------------------------------- */ @@ -354,6 +354,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd static XLogRecData rdata[3]; OffsetNumber placed; static ginxlogInsert data; + int cnt=0; *prdata = rdata; data.updateBlkno = entryPreparePage(btree, page, off); @@ -371,21 +372,33 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd data.isData = false; data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; - rdata[0].buffer = buf; - rdata[0].buffer_std = TRUE; - rdata[0].data = NULL; - rdata[0].len = 0; - rdata[0].next = &rdata[1]; + /* + * Prevent full page write if child's split occurs. That is needed + * to remove incomplete splits while replaying WAL + * + * data.updateBlkno contains new block number (of newly created right page) + * for recently splited page. + */ + if ( data.updateBlkno == InvalidBlockNumber ) + { + rdata[0].buffer = buf; + rdata[0].buffer_std = TRUE; + rdata[0].data = NULL; + rdata[0].len = 0; + rdata[0].next = &rdata[1]; + cnt++; + } - rdata[1].buffer = InvalidBuffer; - rdata[1].data = (char *) &data; - rdata[1].len = sizeof(ginxlogInsert); - rdata[1].next = &rdata[2]; + rdata[cnt].buffer = InvalidBuffer; + rdata[cnt].data = (char *) &data; + rdata[cnt].len = sizeof(ginxlogInsert); + rdata[cnt].next = &rdata[cnt+1]; + cnt++; - rdata[2].buffer = InvalidBuffer; - rdata[2].data = (char *) btree->entry; - rdata[2].len = IndexTupleSize(btree->entry); - rdata[2].next = NULL; + rdata[cnt].buffer = InvalidBuffer; + rdata[cnt].data = (char *) btree->entry; + rdata[cnt].len = IndexTupleSize(btree->entry); + rdata[cnt].next = NULL; btree->entry = NULL; } diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 09b78ac118..e3a8d26976 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.4 2006/11/12 06:55:53 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.4.2.1 2007/06/04 15:59:19 teodor Exp $ *------------------------------------------------------------------------- */ @@ -17,22 +17,71 @@ #include "catalog/index.h" #include "utils/memutils.h" -static OffsetNumber -findItemInPage(Page page, ItemPointer item, OffsetNumber off) +static bool +findItemInPage(Page page, ItemPointer item, OffsetNumber *off) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; int res; - for (; off <= maxoff; off++) - { - res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, off)); - Assert(res >= 0); + if ( GinPageGetOpaque(page)->flags & GIN_DELETED ) + /* page was deleted by concurrent vacuum */ + return false; - if (res == 0) - return off; + if ( *off > maxoff || *off == InvalidOffsetNumber ) + res = -1; + else + res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); + + if ( res == 0 ) + { + /* page isn't changed */ + return true; + } + else if ( res > 0 ) + { + /* + * some items was added before our position, look further to find + * it or first greater + */ + + (*off)++; + for (; *off <= maxoff; (*off)++) + { + res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); + + if (res == 0) + return true; + + if (res < 0) + { + (*off)--; + return true; + } + } + } + else + { + /* + * some items was deleted before our position, look from begining + * to find it or first greater + */ + + for(*off = FirstOffsetNumber; *off<= maxoff; (*off)++) + { + res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); + + if ( res == 0 ) + return true; + + if (res < 0) + { + (*off)--; + return true; + } + } } - return InvalidOffsetNumber; + return false; } /* @@ -111,7 +160,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs } else if (entry->buffer != InvalidBuffer) { - /* we should find place were we was stopped */ + /* we should find place where we was stopped */ BlockNumber blkno; Page page; @@ -125,7 +174,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs page = BufferGetPage(entry->buffer); /* try to find curItem in current buffer */ - if ((entry->offset = findItemInPage(page, &entry->curItem, entry->offset)) != InvalidOffsetNumber) + if ( findItemInPage(page, &entry->curItem, &entry->offset) ) return; /* walk to right */ @@ -136,11 +185,15 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs LockBuffer(entry->buffer, GIN_SHARE); page = BufferGetPage(entry->buffer); - if ((entry->offset = findItemInPage(page, &entry->curItem, FirstOffsetNumber)) != InvalidOffsetNumber) + entry->offset = InvalidOffsetNumber; + if ( findItemInPage(page, &entry->curItem, &entry->offset) ) return; } - elog(ERROR, "Logic error: lost previously founded ItemId"); + /* + * curItem and any greated items was deleted by concurrent vacuum, + * so we finished scan with currrent entry + */ } } diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 225dbc6047..cf643ac9c8 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.9 2006/11/30 16:22:32 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.9.2.1 2007/06/04 15:59:19 teodor Exp $ *------------------------------------------------------------------------- */ @@ -159,14 +159,14 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, /* * We should be sure that we don't concurrent with inserts, insert process * never release root page until end (but it can unlock it and lock - * again). If we lock root with with LockBufferForCleanup, new scan - * process can't begin, but previous may run. ginmarkpos/start* keeps - * buffer pinned, so we will wait for it. We lock only one posting tree in - * whole index, so, it's concurrent enough.. Side effect: after this is - * full complete, tree is unused by any other process + * again). New scan can't start but previously started + * ones work concurrently. */ - LockBufferForCleanup(buffer); + if ( isRoot ) + LockBufferForCleanup(buffer); + else + LockBuffer(buffer, GIN_EXCLUSIVE); Assert(GinPageIsData(page)); @@ -248,6 +248,8 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn if (!isParentRoot) /* parent is already locked by * LockBufferForCleanup() */ LockBuffer(pBuffer, GIN_EXCLUSIVE); + if (leftBlkno != InvalidBlockNumber) + LockBuffer(lBuffer, GIN_EXCLUSIVE); START_CRIT_SECTION(); @@ -255,8 +257,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn { BlockNumber rightlink; - LockBuffer(lBuffer, GIN_EXCLUSIVE); - page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; @@ -274,6 +274,10 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn PageDeletePostingItem(parentPage, myoff); page = BufferGetPage(dBuffer); + /* + * we shouldn't change rightlink field to save + * workability of running search scan + */ GinPageGetOpaque(page)->flags = GIN_DELETED; if (!gvs->index->rd_istemp) diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 788f290b84..36cefa22ab 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.5 2006/10/04 00:29:48 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.5.2.1 2007/06/04 15:59:20 teodor Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -53,6 +53,7 @@ static void forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno) { ListCell *l; + bool found = false; foreach(l, incomplete_splits) { @@ -61,9 +62,16 @@ forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updat if (RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno) { incomplete_splits = list_delete_ptr(incomplete_splits, split); + found = true; break; } } + + if (!found) + { + elog(ERROR, "failed to identify corresponding split record for %u/%u/%u", + node.relNode, leftBlkno, updateBlkno); + } } static void @@ -416,7 +424,7 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) UnlockReleaseBuffer(buffer); } - if (!(record->xl_info & XLR_BKP_BLOCK_2) && data->leftBlkno != InvalidBlockNumber) + if (!(record->xl_info & XLR_BKP_BLOCK_3) && data->leftBlkno != InvalidBlockNumber) { buffer = XLogReadBuffer(reln, data->leftBlkno, false); page = BufferGetPage(buffer); @@ -594,6 +602,7 @@ gin_xlog_cleanup(void) MemoryContextSwitchTo(topCtx); MemoryContextDelete(opCtx); + incomplete_splits = NIL; } bool