Bitmap Table Scans use unified TBMIterator

With the repurposing of TBMIterator as an interface for both parallel
and serial iteration through TIDBitmaps in commit 7f9d4187e7,
bitmap table scans may now use it.

Modify bitmap table scan code to use the TBMIterator. This requires
moving around a bit of code, so a few variables are initialized
elsewhere.

Author: Melanie Plageman
Reviewed-by: Tomas Vondra
Discussion: https://postgr.es/m/c736f6aa-8b35-4e20-9621-62c7c82e2168%40vondra.me
This commit is contained in:
Melanie Plageman 2024-12-18 18:43:39 -05:00
parent 7f9d4187e7
commit 1a0da347a7
6 changed files with 86 additions and 123 deletions

View File

@ -2135,10 +2135,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
{
CHECK_FOR_INTERRUPTS();
if (scan->st.bitmap.rs_shared_iterator)
tbmres = tbm_shared_iterate(scan->st.bitmap.rs_shared_iterator);
else
tbmres = tbm_private_iterate(scan->st.bitmap.rs_iterator);
tbmres = tbm_iterate(&scan->st.rs_tbmiterator);
if (tbmres == NULL)
return false;

View File

@ -95,8 +95,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
*/
if (!node->initialized)
{
TBMPrivateIterator *tbmiterator = NULL;
TBMSharedIterator *shared_tbmiterator = NULL;
TBMIterator tbmiterator;
if (!pstate)
{
@ -106,70 +105,62 @@ BitmapHeapNext(BitmapHeapScanState *node)
elog(ERROR, "unrecognized result from subplan");
node->tbm = tbm;
tbmiterator = tbm_begin_private_iterate(tbm);
#ifdef USE_PREFETCH
if (node->prefetch_maximum > 0)
{
node->prefetch_iterator = tbm_begin_private_iterate(tbm);
node->prefetch_pages = 0;
node->prefetch_target = -1;
}
node->prefetch_iterator =
tbm_begin_iterate(node->tbm, dsa,
pstate ?
pstate->prefetch_iterator :
InvalidDsaPointer);
#endif /* USE_PREFETCH */
}
else
else if (BitmapShouldInitializeSharedState(pstate))
{
/*
* The leader will immediately come out of the function, but
* others will be blocked until leader populates the TBM and wakes
* them up.
*/
if (BitmapShouldInitializeSharedState(pstate))
{
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
if (!tbm || !IsA(tbm, TIDBitmap))
elog(ERROR, "unrecognized result from subplan");
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
if (!tbm || !IsA(tbm, TIDBitmap))
elog(ERROR, "unrecognized result from subplan");
node->tbm = tbm;
node->tbm = tbm;
/*
* Prepare to iterate over the TBM. This will return the
* dsa_pointer of the iterator state which will be used by
* multiple processes to iterate jointly.
*/
pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
#ifdef USE_PREFETCH
if (node->prefetch_maximum > 0)
{
pstate->prefetch_iterator =
tbm_prepare_shared_iterate(tbm);
/*
* We don't need the mutex here as we haven't yet woke up
* others.
*/
pstate->prefetch_pages = 0;
pstate->prefetch_target = -1;
}
#endif
/* We have initialized the shared state so wake up others. */
BitmapDoneInitializingSharedState(pstate);
}
/* Allocate a private iterator and attach the shared state to it */
shared_tbmiterator =
tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
/*
* Prepare to iterate over the TBM. This will return the
* dsa_pointer of the iterator state which will be used by
* multiple processes to iterate jointly.
*/
pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
#ifdef USE_PREFETCH
if (node->prefetch_maximum > 0)
{
node->shared_prefetch_iterator =
tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
pstate->prefetch_iterator =
tbm_prepare_shared_iterate(tbm);
}
#endif /* USE_PREFETCH */
/* We have initialized the shared state so wake up others. */
BitmapDoneInitializingSharedState(pstate);
}
tbmiterator = tbm_begin_iterate(tbm, dsa,
pstate ?
pstate->tbmiterator :
InvalidDsaPointer);
#ifdef USE_PREFETCH
if (node->prefetch_maximum > 0)
node->prefetch_iterator =
tbm_begin_iterate(tbm, dsa,
pstate ?
pstate->prefetch_iterator :
InvalidDsaPointer);
#endif /* USE_PREFETCH */
/*
* If this is the first scan of the underlying table, create the table
* scan descriptor and begin the scan.
@ -198,8 +189,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
node->ss.ss_currentScanDesc = scan;
}
scan->st.bitmap.rs_iterator = tbmiterator;
scan->st.bitmap.rs_shared_iterator = shared_tbmiterator;
scan->st.rs_tbmiterator = tbmiterator;
node->initialized = true;
goto new_page;
@ -285,7 +275,7 @@ new_page:
* ahead of the current block.
*/
if (node->pstate == NULL &&
node->prefetch_iterator &&
!tbm_exhausted(&node->prefetch_iterator) &&
node->prefetch_blockno < node->blockno)
elog(ERROR,
"prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
@ -332,16 +322,16 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
if (pstate == NULL)
{
TBMPrivateIterator *prefetch_iterator = node->prefetch_iterator;
TBMIterator *prefetch_iterator = &node->prefetch_iterator;
if (node->prefetch_pages > 0)
{
/* The main iterator has closed the distance by one page */
node->prefetch_pages--;
}
else if (prefetch_iterator)
else if (!tbm_exhausted(prefetch_iterator))
{
tbmpre = tbm_private_iterate(prefetch_iterator);
tbmpre = tbm_iterate(prefetch_iterator);
node->prefetch_blockno = tbmpre ? tbmpre->blockno :
InvalidBlockNumber;
}
@ -359,7 +349,7 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
*/
if (node->prefetch_maximum > 0)
{
TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
TBMIterator *prefetch_iterator = &node->prefetch_iterator;
SpinLockAcquire(&pstate->mutex);
if (pstate->prefetch_pages > 0)
@ -380,9 +370,9 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
* we don't validate the blockno here as we do in non-parallel
* case.
*/
if (prefetch_iterator)
if (!tbm_exhausted(prefetch_iterator))
{
tbmpre = tbm_shared_iterate(prefetch_iterator);
tbmpre = tbm_iterate(prefetch_iterator);
node->prefetch_blockno = tbmpre ? tbmpre->blockno :
InvalidBlockNumber;
}
@ -446,21 +436,19 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
if (pstate == NULL)
{
TBMPrivateIterator *prefetch_iterator = node->prefetch_iterator;
TBMIterator *prefetch_iterator = &node->prefetch_iterator;
if (prefetch_iterator)
if (!tbm_exhausted(prefetch_iterator))
{
while (node->prefetch_pages < node->prefetch_target)
{
TBMIterateResult *tbmpre;
TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
bool skip_fetch;
tbmpre = tbm_private_iterate(prefetch_iterator);
if (tbmpre == NULL)
{
/* No more pages to prefetch */
tbm_end_private_iterate(prefetch_iterator);
node->prefetch_iterator = NULL;
tbm_end_iterate(prefetch_iterator);
break;
}
node->prefetch_pages++;
@ -488,9 +476,9 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
if (pstate->prefetch_pages < pstate->prefetch_target)
{
TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
TBMIterator *prefetch_iterator = &node->prefetch_iterator;
if (prefetch_iterator)
if (!tbm_exhausted(prefetch_iterator))
{
while (1)
{
@ -513,12 +501,11 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
if (!do_prefetch)
return;
tbmpre = tbm_shared_iterate(prefetch_iterator);
tbmpre = tbm_iterate(prefetch_iterator);
if (tbmpre == NULL)
{
/* No more pages to prefetch */
tbm_end_shared_iterate(prefetch_iterator);
node->shared_prefetch_iterator = NULL;
tbm_end_iterate(prefetch_iterator);
break;
}
@ -587,39 +574,30 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
/*
* End iteration on iterators saved in scan descriptor.
*/
if (scan->st.bitmap.rs_shared_iterator)
{
tbm_end_shared_iterate(scan->st.bitmap.rs_shared_iterator);
scan->st.bitmap.rs_shared_iterator = NULL;
}
if (scan->st.bitmap.rs_iterator)
{
tbm_end_private_iterate(scan->st.bitmap.rs_iterator);
scan->st.bitmap.rs_iterator = NULL;
}
tbm_end_iterate(&scan->st.rs_tbmiterator);
/* rescan to release any page pin */
table_rescan(node->ss.ss_currentScanDesc, NULL);
}
/* If we did not already clean up the prefetch iterator, do so now. */
if (!tbm_exhausted(&node->prefetch_iterator))
tbm_end_iterate(&node->prefetch_iterator);
/* release bitmaps and buffers if any */
if (node->prefetch_iterator)
tbm_end_private_iterate(node->prefetch_iterator);
if (node->shared_prefetch_iterator)
tbm_end_shared_iterate(node->shared_prefetch_iterator);
if (node->tbm)
tbm_free(node->tbm);
if (node->pvmbuffer != InvalidBuffer)
ReleaseBuffer(node->pvmbuffer);
node->tbm = NULL;
node->prefetch_iterator = NULL;
node->initialized = false;
node->shared_prefetch_iterator = NULL;
node->pvmbuffer = InvalidBuffer;
node->recheck = true;
/* Only used for serial BHS */
node->blockno = InvalidBlockNumber;
node->prefetch_blockno = InvalidBlockNumber;
node->prefetch_pages = 0;
node->prefetch_target = -1;
ExecScanReScan(&node->ss);
@ -678,17 +656,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
/*
* End iteration on iterators saved in scan descriptor.
*/
if (scanDesc->st.bitmap.rs_shared_iterator)
{
tbm_end_shared_iterate(scanDesc->st.bitmap.rs_shared_iterator);
scanDesc->st.bitmap.rs_shared_iterator = NULL;
}
if (scanDesc->st.bitmap.rs_iterator)
{
tbm_end_private_iterate(scanDesc->st.bitmap.rs_iterator);
scanDesc->st.bitmap.rs_iterator = NULL;
}
tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
/*
* close table scan
@ -696,15 +664,15 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
table_endscan(scanDesc);
}
/* If we did not already clean up the prefetch iterator, do so now. */
if (!tbm_exhausted(&node->prefetch_iterator))
tbm_end_iterate(&node->prefetch_iterator);
/*
* release bitmaps and buffers if any
*/
if (node->prefetch_iterator)
tbm_end_private_iterate(node->prefetch_iterator);
if (node->tbm)
tbm_free(node->tbm);
if (node->shared_prefetch_iterator)
tbm_end_shared_iterate(node->shared_prefetch_iterator);
if (node->pvmbuffer != InvalidBuffer)
ReleaseBuffer(node->pvmbuffer);
}
@ -744,11 +712,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
/* Zero the statistics counters */
memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
scanstate->prefetch_iterator = NULL;
scanstate->prefetch_pages = 0;
scanstate->prefetch_target = 0;
scanstate->prefetch_target = -1;
scanstate->initialized = false;
scanstate->shared_prefetch_iterator = NULL;
scanstate->pstate = NULL;
scanstate->recheck = true;
scanstate->blockno = InvalidBlockNumber;
@ -907,7 +873,7 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
/* Initialize the mutex */
SpinLockInit(&pstate->mutex);
pstate->prefetch_pages = 0;
pstate->prefetch_target = 0;
pstate->prefetch_target = -1;
pstate->state = BM_INITIAL;
ConditionVariableInit(&pstate->cv);
@ -944,6 +910,8 @@ ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
return;
pstate->state = BM_INITIAL;
pstate->prefetch_pages = 0;
pstate->prefetch_target = -1;
if (DsaPointerIsValid(pstate->tbmiterator))
tbm_free_shared_area(dsa, pstate->tbmiterator);

View File

@ -16,6 +16,7 @@
#include "access/htup_details.h"
#include "access/itup.h"
#include "nodes/tidbitmap.h"
#include "port/atomics.h"
#include "storage/buf.h"
#include "storage/relfilelocator.h"
@ -25,9 +26,6 @@
struct ParallelTableScanDescData;
struct TBMPrivateIterator;
struct TBMSharedIterator;
/*
* Generic descriptor for table scans. This is the base-class for table scans,
* which needs to be embedded in the scans of individual AMs.
@ -45,12 +43,8 @@ typedef struct TableScanDescData
*/
union
{
/* Iterators for Bitmap Table Scans */
struct
{
struct TBMPrivateIterator *rs_iterator;
struct TBMSharedIterator *rs_shared_iterator;
} bitmap;
/* Iterator for Bitmap Table Scans */
TBMIterator rs_tbmiterator;
/*
* Range of ItemPointers for table_scan_getnextslot_tidrange() to

View File

@ -957,17 +957,13 @@ static inline TableScanDesc
table_beginscan_bm(Relation rel, Snapshot snapshot,
int nkeys, struct ScanKeyData *key, bool need_tuple)
{
TableScanDesc result;
uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
if (need_tuple)
flags |= SO_NEED_TUPLES;
result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
NULL, flags);
result->st.bitmap.rs_shared_iterator = NULL;
result->st.bitmap.rs_iterator = NULL;
return result;
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
NULL, flags);
}
/*

View File

@ -1838,7 +1838,6 @@ typedef struct SharedBitmapHeapInstrumentation
* prefetch_target current target prefetch distance
* prefetch_maximum maximum value for prefetch_target
* initialized is node is ready to iterate
* shared_prefetch_iterator shared iterator for prefetching
* pstate shared state for parallel bitmap scan
* sinstrument statistics for parallel workers
* recheck do current page's tuples need recheck
@ -1853,12 +1852,11 @@ typedef struct BitmapHeapScanState
TIDBitmap *tbm;
Buffer pvmbuffer;
BitmapHeapScanInstrumentation stats;
TBMPrivateIterator *prefetch_iterator;
TBMIterator prefetch_iterator;
int prefetch_pages;
int prefetch_target;
int prefetch_maximum;
bool initialized;
TBMSharedIterator *shared_prefetch_iterator;
ParallelBitmapHeapState *pstate;
SharedBitmapHeapInstrumentation *sinstrument;
bool recheck;

View File

@ -92,4 +92,14 @@ extern void tbm_end_iterate(TBMIterator *iterator);
extern TBMIterateResult *tbm_iterate(TBMIterator *iterator);
static inline bool
tbm_exhausted(TBMIterator *iterator)
{
/*
* It doesn't matter if we check the private or shared iterator here. If
* tbm_end_iterate() was called, they will be NULL
*/
return !iterator->i.private_iterator;
}
#endif /* TIDBITMAP_H */