Rewrite the key-combination logic in GIN's keyGetItem() and scanGetItem()

routines to make them behave better in the presence of "lossy" index pointers.
The previous coding was outright incorrect for some cases, as recently
reported by Artur Dabrowski: scanGetItem would fail to return index entries in
cases where one index key had multiple exact pointers on the same page as
another key had a lossy pointer.  Also, keyGetItem was extremely inefficient
for cases where a single index key generates multiple "entry" streams, such as
an @@ operator with a multiple-clause tsquery.  The presence of a lossy page
pointer in any one stream defeated its ability to use the opclass
consistentFn, resulting in probing many heap pages that didn't really need to
be visited.  In Artur's example case, a query like
	WHERE tsvector @@ to_tsquery('a & b')
was about 50X slower than the theoretically equivalent
	WHERE tsvector @@ to_tsquery('a') AND tsvector @@ to_tsquery('b')
The way that I chose to fix this was to have GIN call the consistentFn
twice with both TRUE and FALSE values for the in-doubt entry stream,
returning a hit if either call produces TRUE, but not if they both return
FALSE.  The code handles this for the case of a single in-doubt entry stream,
but punts (falling back to the stupid behavior) if there's more than one lossy
reference to the same page.  The idea could be scaled up to deal with multiple
lossy references, but I think that would probably be wasted complexity.  At
least to judge by Artur's example, such cases don't occur often enough to be
worth trying to optimize.

Back-patch to 8.4.  8.3 did not have lossy GIN index pointers, so not
subject to these problems.
This commit is contained in:
Tom Lane 2010-07-31 00:31:04 +00:00
parent 5f3c54a3a6
commit ed34833c57
3 changed files with 226 additions and 144 deletions

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.30 2010/02/26 02:00:33 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.30.4.1 2010/07/31 00:31:04 tgl Exp $
*-------------------------------------------------------------------------
*/
@ -412,7 +412,6 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
for (i = 0; i < key->nentries; i++)
startScanEntry(index, ginstate, key->scanEntry + i);
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
key->isFinished = FALSE;
key->firstCall = FALSE;
@ -461,11 +460,9 @@ entryGetNextItem(Relation index, GinScanEntry entry)
for (;;)
{
entry->offset++;
if (entry->offset <= entry->nlist)
if (entry->offset < entry->nlist)
{
entry->curItem = entry->list[entry->offset - 1];
entry->curItem = entry->list[entry->offset++];
return;
}
@ -484,7 +481,7 @@ entryGetNextItem(Relation index, GinScanEntry entry)
if (blkno == InvalidBlockNumber)
{
ReleaseBuffer(entry->buffer);
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
ItemPointerSetInvalid(&entry->curItem);
entry->buffer = InvalidBuffer;
entry->isFinished = TRUE;
return;
@ -495,7 +492,8 @@ entryGetNextItem(Relation index, GinScanEntry entry)
page = BufferGetPage(entry->buffer);
entry->offset = InvalidOffsetNumber;
if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset))
if (!ItemPointerIsValid(&entry->curItem) ||
findItemInPage(page, &entry->curItem, &entry->offset))
{
/*
* Found position equal to or greater than stored
@ -507,7 +505,8 @@ entryGetNextItem(Relation index, GinScanEntry entry)
LockBuffer(entry->buffer, GIN_UNLOCK);
if (!ItemPointerIsValid(&entry->curItem) ||
compareItemPointers(&entry->curItem, entry->list + entry->offset - 1) == 0)
compareItemPointers(&entry->curItem,
entry->list + entry->offset - 1) == 0)
{
/*
* First pages are deleted or empty, or we found exact
@ -532,12 +531,14 @@ entryGetNextItem(Relation index, GinScanEntry entry)
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
/*
* Sets entry->curItem to new found heap item pointer for one
* entry of one scan key
* Sets entry->curItem to next heap item pointer for one entry of one scan key,
* or sets entry->isFinished to TRUE if there are no more.
*/
static bool
static void
entryGetItem(Relation index, GinScanEntry entry)
{
Assert(!entry->isFinished);
if (entry->master)
{
entry->isFinished = entry->master->isFinished;
@ -554,7 +555,7 @@ entryGetItem(Relation index, GinScanEntry entry)
if (entry->partialMatchResult == NULL)
{
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
ItemPointerSetInvalid(&entry->curItem);
tbm_end_iterate(entry->partialMatchIterator);
entry->partialMatchIterator = NULL;
entry->isFinished = TRUE;
@ -600,7 +601,7 @@ entryGetItem(Relation index, GinScanEntry entry)
entry->curItem = entry->list[entry->offset - 1];
else
{
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
ItemPointerSetInvalid(&entry->curItem);
entry->isFinished = TRUE;
}
}
@ -609,127 +610,175 @@ entryGetItem(Relation index, GinScanEntry entry)
do
{
entryGetNextItem(index, entry);
} while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
} while (entry->isFinished == FALSE &&
entry->reduceResult == TRUE &&
dropItem(entry));
}
return entry->isFinished;
}
/*
* Sets key->curItem to new found heap item pointer for one scan key
* Returns isFinished, ie TRUE means we did NOT get a new item pointer!
* Also, *keyrecheck is set true if recheck is needed for this scan key.
* Note: lossy page could be returned after items from the same page.
* Sets key->curItem to next heap item pointer for one scan key, advancing
* past any item pointers <= advancePast.
* Sets key->isFinished to TRUE if there are no more.
*
* On success, key->recheckCurItem is set true iff recheck is needed for this
* item pointer (including the case where the item pointer is a lossy page
* pointer).
*
* Note: lossy page could be returned after single items from the same page.
* This is OK since the results will just be used to build a bitmap; we'll
* set a bitmap entry more than once, but never actually return a row twice.
*/
static bool
static void
keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
GinScanKey key, bool *keyrecheck)
GinScanKey key, ItemPointer advancePast)
{
ItemPointerData myAdvancePast = *advancePast;
uint32 i;
uint32 lossyEntry;
bool haveLossyEntry;
GinScanEntry entry;
bool res;
MemoryContext oldCtx;
if (key->isFinished)
return TRUE;
Assert(!key->isFinished);
do
{
/*
* move forward from previously value and set new curItem, which is
* minimal from entries->curItems. Lossy page is encoded by
* ItemPointer with max value for offset (0xffff), so if there is an
* non-lossy entries on lossy page they will returned too and after
* that the whole page. That's not a problem for resulting tidbitmap.
* Advance any entries that are <= myAdvancePast. In particular,
* since entry->curItem was initialized with ItemPointerSetMin, this
* ensures we fetch the first item for each entry on the first call.
* Then set key->curItem to the minimum of the valid entry curItems.
*
* Note: a lossy-page entry is encoded by a ItemPointer with max value
* for offset (0xffff), so that it will sort after any exact entries
* for the same page. So we'll prefer to return exact pointers not
* lossy pointers, which is good. Also, when we advance past an exact
* entry after processing it, we will not advance past lossy entries
* for the same page in other keys, which is NECESSARY for correct
* results (since we might have additional entries for the same page
* in the first key).
*/
ItemPointerSetMax(&key->curItem);
for (i = 0; i < key->nentries; i++)
{
entry = key->scanEntry + i;
if (key->entryRes[i])
{
/*
* Move forward only entries which was the least on previous
* call, key->entryRes[i] points that current entry was a
* result of loop/call.
*/
if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE)
{
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
key->curItem = entry->curItem;
}
else
key->entryRes[i] = FALSE;
}
else if (entry->isFinished == FALSE)
{
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
key->curItem = entry->curItem;
}
while (entry->isFinished == FALSE &&
compareItemPointers(&entry->curItem, &myAdvancePast) <= 0)
entryGetItem(index, entry);
if (entry->isFinished == FALSE &&
compareItemPointers(&entry->curItem, &key->curItem) < 0)
key->curItem = entry->curItem;
}
if (ItemPointerIsMax(&key->curItem))
{
/* all entries are finished */
key->isFinished = TRUE;
return TRUE;
return;
}
/*
* Now key->curItem contains closest ItemPointer to previous result.
*
* if key->nentries == 1 then the consistentFn should always succeed,
* but we must call it anyway to find out the recheck status.
* Now key->curItem contains first ItemPointer after previous result.
* Advance myAdvancePast to this value, so that if the consistentFn
* rejects the entry and we loop around again, we will advance to the
* next available item pointer.
*/
myAdvancePast = key->curItem;
/*----------
* entryRes array is used for:
* - as an argument for consistentFn
* - entry->curItem with corresponding key->entryRes[i] == false are
* greater than key->curItem, so next loop/call they should be
* renewed by entryGetItem(). So, we need to set up an array before
* checking of lossy page.
*----------
/*
* Prepare entryRes array to be passed to consistentFn.
*
* If key->nentries == 1 then the consistentFn should always succeed,
* but we must call it anyway to find out the recheck status.
*
* Lossy-page entries pose a problem, since we don't know the correct
* entryRes state to pass to the consistentFn, and we also don't know
* what its combining logic will be (could be AND, OR, or even NOT).
* Our strategy for a single lossy-page entry is to try the
* consistentFn both ways and return a hit if it accepts either one
* (forcing the hit to be marked lossy so it will be rechecked).
*
* This idea could be generalized to more than one lossy-page entry,
* but ideally lossy-page entries should be infrequent so it would
* seldom be the case that we have more than one. If we do find more
* than one, we just punt and return the item as lossy.
*
* Note that only lossy-page entries pointing to the current item's
* page should trigger this processing.
*/
lossyEntry = 0;
haveLossyEntry = false;
for (i = 0; i < key->nentries; i++)
{
entry = key->scanEntry + i;
if (entry->isFinished == FALSE &&
compareItemPointers(&entry->curItem, &key->curItem) == 0)
if (entry->isFinished)
key->entryRes[i] = FALSE;
else if (ItemPointerIsLossyPage(&entry->curItem) &&
GinItemPointerGetBlockNumber(&entry->curItem) ==
GinItemPointerGetBlockNumber(&key->curItem))
{
if (haveLossyEntry)
{
/* Too many lossy entries, punt */
key->recheckCurItem = true;
return;
}
lossyEntry = i;
haveLossyEntry = true;
/* initially assume TRUE */
key->entryRes[i] = TRUE;
}
else if (compareItemPointers(&entry->curItem, &key->curItem) == 0)
key->entryRes[i] = TRUE;
else
key->entryRes[i] = FALSE;
}
oldCtx = MemoryContextSwitchTo(tempCtx);
/*
* Initialize *keyrecheck in case the consistentFn doesn't know it
* Initialize recheckCurItem in case the consistentFn doesn't know it
* should set it. The safe assumption in that case is to force
* recheck.
*/
*keyrecheck = true;
key->recheckCurItem = true;
/*
* If one of the entry's scans returns lossy result, return it without
* further checking - we can't call consistentFn for lack of data.
*/
if (ItemPointerIsLossyPage(&key->curItem))
return FALSE;
oldCtx = MemoryContextSwitchTo(tempCtx);
res = DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1],
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(keyrecheck)));
PointerGetDatum(&key->recheckCurItem)));
if (!res && haveLossyEntry)
{
/* try the other way for the lossy item */
key->entryRes[lossyEntry] = FALSE;
key->recheckCurItem = true;
res = DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1],
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(&key->recheckCurItem)));
}
MemoryContextSwitchTo(oldCtx);
MemoryContextReset(tempCtx);
} while (!res);
return FALSE;
/* If we matched a lossy entry, force recheckCurItem = true */
if (haveLossyEntry)
key->recheckCurItem = true;
} while (!res);
}
@ -904,7 +953,7 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
j;
/*
* Resets entryRes
* Reset entryRes
*/
for (i = 0; i < so->nkeys; i++)
{
@ -1145,75 +1194,103 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
}
/*
* Get heap item pointer from scan
* returns true if found
* Get next heap item pointer (after advancePast) from scan.
* Returns true if anything found.
* On success, *item and *recheck are set.
*
* Note: this is very nearly the same logic as in keyGetItem(), except
* that we know the keys are to be combined with AND logic, whereas in
* keyGetItem() the combination logic is known only to the consistentFn.
*/
static bool
scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck)
scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
ItemPointerData *item, bool *recheck)
{
GinScanOpaque so = (GinScanOpaque) scan->opaque;
ItemPointerData myAdvancePast = *advancePast;
uint32 i;
bool keyrecheck;
bool match;
for (;;)
{
/*
* Advance any keys that are <= myAdvancePast. In particular,
* since key->curItem was initialized with ItemPointerSetMin, this
* ensures we fetch the first item for each key on the first call.
* Then set *item to the minimum of the key curItems.
*
* Note: a lossy-page entry is encoded by a ItemPointer with max value
* for offset (0xffff), so that it will sort after any exact entries
* for the same page. So we'll prefer to return exact pointers not
* lossy pointers, which is good. Also, when we advance past an exact
* entry after processing it, we will not advance past lossy entries
* for the same page in other keys, which is NECESSARY for correct
* results (since we might have additional entries for the same page
* in the first key).
*/
ItemPointerSetMax(item);
for (i = 0; i < so->nkeys; i++)
{
GinScanKey key = so->keys + i;
while (key->isFinished == FALSE &&
compareItemPointers(&key->curItem, &myAdvancePast) <= 0)
keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
key, &myAdvancePast);
if (key->isFinished)
return FALSE; /* finished one of keys */
if (compareItemPointers(&key->curItem, item) < 0)
*item = key->curItem;
}
Assert(!ItemPointerIsMax(item));
/*
* Now *item contains first ItemPointer after previous result.
*
* The item is a valid hit only if all the keys returned either
* that exact TID, or a lossy reference to the same page.
*/
match = true;
for (i = 0; i < so->nkeys; i++)
{
GinScanKey key = so->keys + i;
if (compareItemPointers(item, &key->curItem) == 0)
continue;
if (ItemPointerIsLossyPage(&key->curItem) &&
GinItemPointerGetBlockNumber(&key->curItem) ==
GinItemPointerGetBlockNumber(item))
continue;
match = false;
break;
}
if (match)
break;
/*
* No hit. Update myAdvancePast to this TID, so that on the next
* pass we'll move to the next possible entry.
*/
myAdvancePast = *item;
}
/*
* We return recheck = true if any of the keyGetItem calls return
* keyrecheck = true. Note that because the second loop might advance
* some keys, this could theoretically be too conservative. In practice
* though, we expect that a consistentFn's recheck result will depend only
* on the operator and the query, so for any one key it should stay the
* same regardless of advancing to new items. So it's not worth working
* harder.
* We must return recheck = true if any of the keys are marked recheck.
*/
*recheck = false;
ItemPointerSetMin(item);
for (i = 0; i < so->nkeys; i++)
{
GinScanKey key = so->keys + i;
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
key, &keyrecheck))
return FALSE; /* finished one of keys */
if (compareItemPointers(item, &key->curItem) < 0)
*item = key->curItem;
*recheck |= keyrecheck;
}
for (i = 1; i <= so->nkeys; i++)
{
GinScanKey key = so->keys + i - 1;
for (;;)
if (key->recheckCurItem)
{
int cmp = compareItemPointers(item, &key->curItem);
if (cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem)))
{
/*
* if one of ItemPointers points to the whole page then
* compare only page's number
*/
if (ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem))
cmp = 0;
else
cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1;
}
if (cmp == 0)
break;
else if (cmp > 0)
{
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
key, &keyrecheck))
return FALSE; /* finished one of keys */
*recheck |= keyrecheck;
}
else
{ /* returns to begin */
*item = key->curItem;
i = 0;
break;
}
*recheck = true;
break;
}
}
@ -1229,6 +1306,8 @@ gingetbitmap(PG_FUNCTION_ARGS)
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
int64 ntids;
ItemPointerData iptr;
bool recheck;
if (GinIsNewKey(scan))
newScanKey(scan);
@ -1255,14 +1334,13 @@ gingetbitmap(PG_FUNCTION_ARGS)
*/
startScan(scan);
ItemPointerSetMin(&iptr);
for (;;)
{
ItemPointerData iptr;
bool recheck;
CHECK_FOR_INTERRUPTS();
if (!scanGetItem(scan, &iptr, &recheck))
if (!scanGetItem(scan, &iptr, &iptr, &recheck))
break;
if (ItemPointerIsLossyPage(&iptr))

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.26 2010/01/18 11:50:43 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.26.6.1 2010/07/31 00:31:04 tgl Exp $
*-------------------------------------------------------------------------
*/
@ -51,7 +51,7 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query
key->extra_data = extra_data;
key->query = query;
key->firstCall = TRUE;
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber);
ItemPointerSetMin(&key->curItem);
for (i = 0; i < nEntryValues; i++)
{
@ -59,7 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query
key->scanEntry[i].entry = entryValues[i];
key->scanEntry[i].attnum = attnum;
key->scanEntry[i].extra_data = (extra_data) ? extra_data[i] : NULL;
ItemPointerSet(&(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber);
ItemPointerSetMin(&key->scanEntry[i].curItem);
key->scanEntry[i].isFinished = FALSE;
key->scanEntry[i].offset = InvalidOffsetNumber;
key->scanEntry[i].buffer = InvalidBuffer;
key->scanEntry[i].partialMatch = NULL;
@ -100,14 +101,15 @@ resetScanKeys(GinScanKey keys, uint32 nkeys)
GinScanKey key = keys + i;
key->firstCall = TRUE;
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber);
ItemPointerSetMin(&key->curItem);
for (j = 0; j < key->nentries; j++)
{
if (key->scanEntry[j].buffer != InvalidBuffer)
ReleaseBuffer(key->scanEntry[i].buffer);
ItemPointerSet(&(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber);
ItemPointerSetMin(&key->scanEntry[j].curItem);
key->scanEntry[j].isFinished = FALSE;
key->scanEntry[j].offset = InvalidOffsetNumber;
key->scanEntry[j].buffer = InvalidBuffer;
key->scanEntry[j].list = NULL;

View File

@ -4,7 +4,7 @@
*
* Copyright (c) 2006-2010, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38 2010/02/26 02:01:20 momjian Exp $
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38.4.1 2010/07/31 00:31:04 tgl Exp $
*--------------------------------------------------------------------------
*/
#ifndef GIN_H
@ -520,6 +520,8 @@ typedef struct GinScanKeyData
OffsetNumber attnum;
ItemPointerData curItem;
bool recheckCurItem;
bool firstCall;
bool isFinished;
} GinScanKeyData;