mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-12 18:34:36 +08:00
Fix using too many LWLocks bug, reported by Craig Ringer
<craig@postnewspapers.com.au>. It was my mistake, I missed limitation of number of held locks, now GIN doesn't use continiuous locks, but still hold buffers pinned to prevent interference with vacuum's deletion algorithm.
This commit is contained in:
parent
a18d6de197
commit
0861266bab
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.4.2.1 2007/06/04 15:59:19 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.4.2.2 2008/04/22 17:53:41 teodor Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@ -27,58 +27,16 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
||||
/* page was deleted by concurrent vacuum */
|
||||
return false;
|
||||
|
||||
if ( *off > maxoff || *off == InvalidOffsetNumber )
|
||||
res = -1;
|
||||
else
|
||||
/*
|
||||
* scan page to find equal or first greater value
|
||||
*/
|
||||
|
||||
for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
|
||||
{
|
||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
||||
|
||||
if ( res == 0 )
|
||||
{
|
||||
/* page isn't changed */
|
||||
return true;
|
||||
}
|
||||
else if ( res > 0 )
|
||||
{
|
||||
/*
|
||||
* some items was added before our position, look further to find
|
||||
* it or first greater
|
||||
*/
|
||||
|
||||
(*off)++;
|
||||
for (; *off <= maxoff; (*off)++)
|
||||
{
|
||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
||||
|
||||
if (res == 0)
|
||||
return true;
|
||||
|
||||
if (res < 0)
|
||||
{
|
||||
(*off)--;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* some items was deleted before our position, look from begining
|
||||
* to find it or first greater
|
||||
*/
|
||||
|
||||
for(*off = FirstOffsetNumber; *off<= maxoff; (*off)++)
|
||||
{
|
||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
||||
|
||||
if ( res == 0 )
|
||||
return true;
|
||||
|
||||
if (res < 0)
|
||||
{
|
||||
(*off)--;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (res <= 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -89,119 +47,87 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
||||
* Stop* functions unlock buffer (but don't release!)
|
||||
*/
|
||||
static void
|
||||
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall)
|
||||
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
|
||||
{
|
||||
GinBtreeData btreeEntry;
|
||||
GinBtreeStack *stackEntry;
|
||||
Page page;
|
||||
bool needUnlock = TRUE;
|
||||
|
||||
if (entry->master != NULL)
|
||||
{
|
||||
entry->isFinished = entry->master->isFinished;
|
||||
return;
|
||||
}
|
||||
|
||||
if (firstCall)
|
||||
/*
|
||||
* We should find entry, and begin scan of posting tree
|
||||
* or just store posting list in memory
|
||||
*/
|
||||
|
||||
prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
|
||||
btreeEntry.searchMode = TRUE;
|
||||
stackEntry = ginFindLeafPage(&btreeEntry, NULL);
|
||||
page = BufferGetPage(stackEntry->buffer);
|
||||
|
||||
entry->isFinished = TRUE;
|
||||
entry->buffer = InvalidBuffer;
|
||||
entry->offset = InvalidOffsetNumber;
|
||||
entry->list = NULL;
|
||||
entry->nlist = 0;
|
||||
entry->reduceResult = FALSE;
|
||||
entry->predictNumberResult = 0;
|
||||
|
||||
if (btreeEntry.findItem(&btreeEntry, stackEntry))
|
||||
{
|
||||
/*
|
||||
* at first call we should find entry, and begin scan of posting tree
|
||||
* or just store posting list in memory
|
||||
*/
|
||||
GinBtreeData btreeEntry;
|
||||
GinBtreeStack *stackEntry;
|
||||
Page page;
|
||||
bool needUnlock = TRUE;
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
||||
|
||||
prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
|
||||
btreeEntry.searchMode = TRUE;
|
||||
stackEntry = ginFindLeafPage(&btreeEntry, NULL);
|
||||
page = BufferGetPage(stackEntry->buffer);
|
||||
|
||||
entry->isFinished = TRUE;
|
||||
entry->buffer = InvalidBuffer;
|
||||
entry->offset = InvalidOffsetNumber;
|
||||
entry->list = NULL;
|
||||
entry->nlist = 0;
|
||||
entry->reduceResult = FALSE;
|
||||
entry->predictNumberResult = 0;
|
||||
|
||||
if (btreeEntry.findItem(&btreeEntry, stackEntry))
|
||||
if (GinIsPostingTree(itup))
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||
GinPostingTreeScan *gdi;
|
||||
Page page;
|
||||
|
||||
if (GinIsPostingTree(itup))
|
||||
{
|
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||
GinPostingTreeScan *gdi;
|
||||
Page page;
|
||||
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||
needUnlock = FALSE;
|
||||
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
||||
|
||||
entry->buffer = scanBeginPostingTree(gdi);
|
||||
IncrBufferRefCount(entry->buffer);
|
||||
|
||||
page = BufferGetPage(entry->buffer);
|
||||
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
||||
|
||||
freeGinBtreeStack(gdi->stack);
|
||||
pfree(gdi);
|
||||
entry->isFinished = FALSE;
|
||||
}
|
||||
else if (GinGetNPosting(itup) > 0)
|
||||
{
|
||||
entry->nlist = GinGetNPosting(itup);
|
||||
entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
|
||||
memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
|
||||
entry->isFinished = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (needUnlock)
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||
freeGinBtreeStack(stackEntry);
|
||||
}
|
||||
else if (entry->buffer != InvalidBuffer)
|
||||
{
|
||||
/* we should find place where we was stopped */
|
||||
BlockNumber blkno;
|
||||
Page page;
|
||||
needUnlock = FALSE;
|
||||
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
||||
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
entry->buffer = scanBeginPostingTree(gdi);
|
||||
/*
|
||||
* We keep buffer pinned because we need to prevent deletition
|
||||
* page during scan. See GIN's vacuum implementation. RefCount
|
||||
* is increased to keep buffer pinned after freeGinBtreeStack() call.
|
||||
*/
|
||||
IncrBufferRefCount(entry->buffer);
|
||||
|
||||
if (!ItemPointerIsValid(&entry->curItem))
|
||||
/* start position */
|
||||
return;
|
||||
Assert(entry->offset != InvalidOffsetNumber);
|
||||
|
||||
page = BufferGetPage(entry->buffer);
|
||||
|
||||
/* try to find curItem in current buffer */
|
||||
if ( findItemInPage(page, &entry->curItem, &entry->offset) )
|
||||
return;
|
||||
|
||||
/* walk to right */
|
||||
while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber)
|
||||
{
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
page = BufferGetPage(entry->buffer);
|
||||
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
||||
|
||||
entry->offset = InvalidOffsetNumber;
|
||||
if ( findItemInPage(page, &entry->curItem, &entry->offset) )
|
||||
return;
|
||||
/*
|
||||
* Keep page content in memory to prevent durable page locking
|
||||
*/
|
||||
entry->list = (ItemPointerData *) palloc( BLCKSZ );
|
||||
entry->nlist = GinPageGetOpaque(page)->maxoff;
|
||||
memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
|
||||
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
|
||||
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
freeGinBtreeStack(gdi->stack);
|
||||
pfree(gdi);
|
||||
entry->isFinished = FALSE;
|
||||
}
|
||||
else if (GinGetNPosting(itup) > 0)
|
||||
{
|
||||
entry->nlist = GinGetNPosting(itup);
|
||||
entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
|
||||
memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
|
||||
entry->isFinished = FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* curItem and any greated items was deleted by concurrent vacuum,
|
||||
* so we finished scan with currrent entry
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
stopScanEntry(GinScanEntry entry)
|
||||
{
|
||||
if (entry->buffer != InvalidBuffer)
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
if (needUnlock)
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||
freeGinBtreeStack(stackEntry);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -209,47 +135,38 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
|
||||
{
|
||||
uint32 i;
|
||||
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall);
|
||||
if (!key->firstCall)
|
||||
return;
|
||||
|
||||
if (key->firstCall)
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
startScanEntry(index, ginstate, key->scanEntry + i);
|
||||
|
||||
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
|
||||
key->isFinished = FALSE;
|
||||
key->firstCall = FALSE;
|
||||
|
||||
if (GinFuzzySearchLimit > 0)
|
||||
{
|
||||
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
|
||||
key->isFinished = FALSE;
|
||||
key->firstCall = FALSE;
|
||||
/*
|
||||
* If all of keys more than treshold we will try to reduce result,
|
||||
* we hope (and only hope, for intersection operation of array our
|
||||
* supposition isn't true), that total result will not more than
|
||||
* minimal predictNumberResult.
|
||||
*/
|
||||
|
||||
if (GinFuzzySearchLimit > 0)
|
||||
{
|
||||
/*
|
||||
* If all of keys more than treshold we will try to reduce result,
|
||||
* we hope (and only hope, for intersection operation of array our
|
||||
* supposition isn't true), that total result will not more than
|
||||
* minimal predictNumberResult.
|
||||
*/
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
|
||||
return;
|
||||
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
|
||||
return;
|
||||
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
|
||||
{
|
||||
key->scanEntry[i].predictNumberResult /= key->nentries;
|
||||
key->scanEntry[i].reduceResult = TRUE;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
|
||||
{
|
||||
key->scanEntry[i].predictNumberResult /= key->nentries;
|
||||
key->scanEntry[i].reduceResult = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
stopScanKey(GinScanKey key)
|
||||
{
|
||||
uint32 i;
|
||||
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
stopScanEntry(key->scanEntry + i);
|
||||
}
|
||||
|
||||
static void
|
||||
startScan(IndexScanDesc scan)
|
||||
{
|
||||
@ -260,44 +177,82 @@ startScan(IndexScanDesc scan)
|
||||
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
|
||||
}
|
||||
|
||||
static void
|
||||
stopScan(IndexScanDesc scan)
|
||||
{
|
||||
uint32 i;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
for (i = 0; i < so->nkeys; i++)
|
||||
stopScanKey(so->keys + i);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Gets next ItemPointer from PostingTree. Note, that we copy
|
||||
* page into GinScanEntry->list array and unlock page, but keep it pinned
|
||||
* to prevent interference with vacuum
|
||||
*/
|
||||
static void
|
||||
entryGetNextItem(Relation index, GinScanEntry entry)
|
||||
{
|
||||
Page page = BufferGetPage(entry->buffer);
|
||||
Page page;
|
||||
BlockNumber blkno;
|
||||
|
||||
entry->offset++;
|
||||
if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
|
||||
for(;;)
|
||||
{
|
||||
entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
|
||||
entry->offset++;
|
||||
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
if (blkno == InvalidBlockNumber)
|
||||
if (entry->offset <= entry->nlist)
|
||||
{
|
||||
ReleaseBuffer(entry->buffer);
|
||||
entry->buffer = InvalidBuffer;
|
||||
entry->isFinished = TRUE;
|
||||
entry->curItem = entry->list[entry->offset - 1];
|
||||
return;
|
||||
}
|
||||
else
|
||||
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
page = BufferGetPage(entry->buffer);
|
||||
for(;;)
|
||||
{
|
||||
/*
|
||||
* It's needed to go by right link. During that we should refind
|
||||
* first ItemPointer greater that stored
|
||||
*/
|
||||
|
||||
blkno = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
if (blkno == InvalidBlockNumber)
|
||||
{
|
||||
ReleaseBuffer(entry->buffer);
|
||||
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
|
||||
entry->buffer = InvalidBuffer;
|
||||
entry->isFinished = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
page = BufferGetPage(entry->buffer);
|
||||
|
||||
entry->offset = InvalidOffsetNumber;
|
||||
entryGetNextItem(index, entry);
|
||||
if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset))
|
||||
{
|
||||
/*
|
||||
* Found position equal to or greater than stored
|
||||
*/
|
||||
entry->nlist = GinPageGetOpaque(page)->maxoff;
|
||||
memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
|
||||
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
|
||||
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
|
||||
if ( !ItemPointerIsValid(&entry->curItem) ||
|
||||
compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
|
||||
{
|
||||
/*
|
||||
* First pages are deleted or empty, or we found exact position,
|
||||
* so break inner loop and continue outer one.
|
||||
*/
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find greater than entry->curItem position, store it.
|
||||
*/
|
||||
entry->curItem = entry->list[entry->offset - 1];
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -317,7 +272,7 @@ entryGetItem(Relation index, GinScanEntry entry)
|
||||
entry->isFinished = entry->master->isFinished;
|
||||
entry->curItem = entry->master->curItem;
|
||||
}
|
||||
else if (entry->list)
|
||||
else if (!BufferIsValid(entry->buffer))
|
||||
{
|
||||
entry->offset++;
|
||||
if (entry->offset <= entry->nlist)
|
||||
@ -497,8 +452,6 @@ gingetmulti(PG_FUNCTION_ARGS)
|
||||
break;
|
||||
} while (*returned_tids < max_tids);
|
||||
|
||||
stopScan(scan);
|
||||
|
||||
PG_RETURN_BOOL(*returned_tids == max_tids);
|
||||
}
|
||||
|
||||
@ -517,7 +470,6 @@ gingettuple(PG_FUNCTION_ARGS)
|
||||
|
||||
startScan(scan);
|
||||
res = scanGetItem(scan, &scan->xs_ctup.t_self);
|
||||
stopScan(scan);
|
||||
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
* header file for postgres inverted index access method implementation.
|
||||
*
|
||||
* Copyright (c) 2006, PostgreSQL Global Development Group
|
||||
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.9.2.1 2007/11/16 21:50:13 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.9.2.2 2008/04/22 17:53:41 teodor Exp $
|
||||
*--------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@ -356,14 +356,16 @@ typedef struct GinScanEntryData
|
||||
/* entry, got from extractQueryFn */
|
||||
Datum entry;
|
||||
|
||||
/* current ItemPointer to heap, its offset in buffer and buffer */
|
||||
ItemPointerData curItem;
|
||||
OffsetNumber offset;
|
||||
/* Current page in posting tree */
|
||||
Buffer buffer;
|
||||
|
||||
/* in case of Posing list */
|
||||
/* current ItemPointer to heap */
|
||||
ItemPointerData curItem;
|
||||
|
||||
/* used for Posting list and one page in Posting tree */
|
||||
ItemPointerData *list;
|
||||
uint32 nlist;
|
||||
uint32 nlist;
|
||||
OffsetNumber offset;
|
||||
|
||||
bool isFinished;
|
||||
bool reduceResult;
|
||||
|
Loading…
Reference in New Issue
Block a user