mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-17 19:30:00 +08:00
tableam: Avoid relying on relation size to determine validity of tids.
Instead add a tableam callback to do so. To avoid adding per validation overhead, pass a scan to tuple_tid_valid. In heap's case we'd otherwise incurred a RelationGetNumberOfBlocks() call for each tid - which'd have added noticable overhead to nodeTidscan.c. Author: Andres Freund Reviewed-By: Ashwin Agrawal Discussion: https://postgr.es/m/20190515185447.gno2jtqxyktylyvs@alap3.anarazel.de
This commit is contained in:
parent
7f44ede594
commit
147e3722f7
@ -1654,8 +1654,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
|
||||
/*
|
||||
* heap_get_latest_tid - get the latest tid of a specified tuple
|
||||
*
|
||||
* Actually, this gets the latest version that is visible according to
|
||||
* the passed snapshot. You can pass SnapshotDirty to get the very latest,
|
||||
* Actually, this gets the latest version that is visible according to the
|
||||
* scan's snapshot. Create a scan using SnapshotDirty to get the very latest,
|
||||
* possibly uncommitted version.
|
||||
*
|
||||
* *tid is both an input and an output parameter: it is updated to
|
||||
@ -1663,28 +1663,20 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
|
||||
* if no version of the row passes the snapshot test.
|
||||
*/
|
||||
void
|
||||
heap_get_latest_tid(Relation relation,
|
||||
Snapshot snapshot,
|
||||
heap_get_latest_tid(TableScanDesc sscan,
|
||||
ItemPointer tid)
|
||||
{
|
||||
BlockNumber blk;
|
||||
Relation relation = sscan->rs_rd;
|
||||
Snapshot snapshot = sscan->rs_snapshot;
|
||||
ItemPointerData ctid;
|
||||
TransactionId priorXmax;
|
||||
|
||||
/* this is to avoid Assert failures on bad input */
|
||||
if (!ItemPointerIsValid(tid))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Since this can be called with user-supplied TID, don't trust the input
|
||||
* too much. (RelationGetNumberOfBlocks is an expensive check, so we
|
||||
* don't check t_ctid links again this way. Note that it would not do to
|
||||
* call it just once and save the result, either.)
|
||||
* table_get_latest_tid verified that the passed in tid is valid. Assume
|
||||
* that t_ctid links are valid however - there shouldn't be invalid ones
|
||||
* in the table.
|
||||
*/
|
||||
blk = ItemPointerGetBlockNumber(tid);
|
||||
if (blk >= RelationGetNumberOfBlocks(relation))
|
||||
elog(ERROR, "block number %u is out of range for relation \"%s\"",
|
||||
blk, RelationGetRelationName(relation));
|
||||
Assert(ItemPointerIsValid(tid));
|
||||
|
||||
/*
|
||||
* Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
|
||||
|
@ -204,6 +204,15 @@ heapam_fetch_row_version(Relation relation,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
|
||||
{
|
||||
HeapScanDesc hscan = (HeapScanDesc) scan;
|
||||
|
||||
return ItemPointerIsValid(tid) &&
|
||||
ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
|
||||
}
|
||||
|
||||
static bool
|
||||
heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
|
||||
Snapshot snapshot)
|
||||
@ -2568,6 +2577,7 @@ static const TableAmRoutine heapam_methods = {
|
||||
|
||||
.tuple_fetch_row_version = heapam_fetch_row_version,
|
||||
.tuple_get_latest_tid = heap_get_latest_tid,
|
||||
.tuple_tid_valid = heapam_tuple_tid_valid,
|
||||
.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
|
||||
.compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
|
||||
|
||||
|
@ -213,6 +213,33 @@ table_index_fetch_tuple_check(Relation rel,
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------
|
||||
* Functions for non-modifying operations on individual tuples
|
||||
* ------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
void
|
||||
table_get_latest_tid(TableScanDesc scan, ItemPointer tid)
|
||||
{
|
||||
Relation rel = scan->rs_rd;
|
||||
const TableAmRoutine *tableam = rel->rd_tableam;
|
||||
|
||||
/*
|
||||
* Since this can be called with user-supplied TID, don't trust the input
|
||||
* too much.
|
||||
*/
|
||||
if (!tableam->tuple_tid_valid(scan, tid))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("tid (%u, %u) is not valid for relation for relation \"%s\"",
|
||||
ItemPointerGetBlockNumberNoCheck(tid),
|
||||
ItemPointerGetOffsetNumberNoCheck(tid),
|
||||
RelationGetRelationName(rel))));
|
||||
|
||||
return tableam->tuple_get_latest_tid(scan, tid);
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Functions to make modifications a bit simpler.
|
||||
* ----------------------------------------------------------------------------
|
||||
|
@ -129,19 +129,23 @@ static void
|
||||
TidListEval(TidScanState *tidstate)
|
||||
{
|
||||
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
|
||||
BlockNumber nblocks;
|
||||
TableScanDesc scan;
|
||||
ItemPointerData *tidList;
|
||||
int numAllocTids;
|
||||
int numTids;
|
||||
ListCell *l;
|
||||
|
||||
/*
|
||||
* We silently discard any TIDs that are out of range at the time of scan
|
||||
* start. (Since we hold at least AccessShareLock on the table, it won't
|
||||
* be possible for someone to truncate away the blocks we intend to
|
||||
* visit.)
|
||||
* Start scan on-demand - initializing a scan isn't free (e.g. heap stats
|
||||
* the size of the table), so it makes sense to delay that until needed -
|
||||
* the node might never get executed.
|
||||
*/
|
||||
nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
|
||||
if (tidstate->ss.ss_currentScanDesc == NULL)
|
||||
tidstate->ss.ss_currentScanDesc =
|
||||
table_beginscan(tidstate->ss.ss_currentRelation,
|
||||
tidstate->ss.ps.state->es_snapshot,
|
||||
0, NULL);
|
||||
scan = tidstate->ss.ss_currentScanDesc;
|
||||
|
||||
/*
|
||||
* We initialize the array with enough slots for the case that all quals
|
||||
@ -165,19 +169,27 @@ TidListEval(TidScanState *tidstate)
|
||||
DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
|
||||
econtext,
|
||||
&isNull));
|
||||
if (!isNull &&
|
||||
ItemPointerIsValid(itemptr) &&
|
||||
ItemPointerGetBlockNumber(itemptr) < nblocks)
|
||||
if (isNull)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We silently discard any TIDs that the AM considers invalid
|
||||
* (E.g. for heap, they could be out of range at the time of scan
|
||||
* start. Since we hold at least AccessShareLock on the table, it
|
||||
* won't be possible for someone to truncate away the blocks we
|
||||
* intend to visit.).
|
||||
*/
|
||||
if (!table_tuple_tid_valid(scan, itemptr))
|
||||
continue;
|
||||
|
||||
if (numTids >= numAllocTids)
|
||||
{
|
||||
if (numTids >= numAllocTids)
|
||||
{
|
||||
numAllocTids *= 2;
|
||||
tidList = (ItemPointerData *)
|
||||
repalloc(tidList,
|
||||
numAllocTids * sizeof(ItemPointerData));
|
||||
}
|
||||
tidList[numTids++] = *itemptr;
|
||||
numAllocTids *= 2;
|
||||
tidList = (ItemPointerData *)
|
||||
repalloc(tidList,
|
||||
numAllocTids * sizeof(ItemPointerData));
|
||||
}
|
||||
tidList[numTids++] = *itemptr;
|
||||
}
|
||||
else if (tidexpr->exprstate && tidexpr->isarray)
|
||||
{
|
||||
@ -206,13 +218,15 @@ TidListEval(TidScanState *tidstate)
|
||||
}
|
||||
for (i = 0; i < ndatums; i++)
|
||||
{
|
||||
if (!ipnulls[i])
|
||||
{
|
||||
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
|
||||
if (ItemPointerIsValid(itemptr) &&
|
||||
ItemPointerGetBlockNumber(itemptr) < nblocks)
|
||||
tidList[numTids++] = *itemptr;
|
||||
}
|
||||
if (ipnulls[i])
|
||||
continue;
|
||||
|
||||
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
|
||||
|
||||
if (!table_tuple_tid_valid(scan, itemptr))
|
||||
continue;
|
||||
|
||||
tidList[numTids++] = *itemptr;
|
||||
}
|
||||
pfree(ipdatums);
|
||||
pfree(ipnulls);
|
||||
@ -306,6 +320,7 @@ TidNext(TidScanState *node)
|
||||
EState *estate;
|
||||
ScanDirection direction;
|
||||
Snapshot snapshot;
|
||||
TableScanDesc scan;
|
||||
Relation heapRelation;
|
||||
TupleTableSlot *slot;
|
||||
ItemPointerData *tidList;
|
||||
@ -327,6 +342,7 @@ TidNext(TidScanState *node)
|
||||
if (node->tss_TidList == NULL)
|
||||
TidListEval(node);
|
||||
|
||||
scan = node->ss.ss_currentScanDesc;
|
||||
tidList = node->tss_TidList;
|
||||
numTids = node->tss_NumTids;
|
||||
|
||||
@ -365,7 +381,7 @@ TidNext(TidScanState *node)
|
||||
* current according to our snapshot.
|
||||
*/
|
||||
if (node->tss_isCurrentOf)
|
||||
table_get_latest_tid(heapRelation, snapshot, &tid);
|
||||
table_get_latest_tid(scan, &tid);
|
||||
|
||||
if (table_fetch_row_version(heapRelation, &tid, snapshot, slot))
|
||||
return slot;
|
||||
@ -442,6 +458,10 @@ ExecReScanTidScan(TidScanState *node)
|
||||
node->tss_NumTids = 0;
|
||||
node->tss_TidPtr = -1;
|
||||
|
||||
/* not really necessary, but seems good form */
|
||||
if (node->ss.ss_currentScanDesc)
|
||||
table_rescan(node->ss.ss_currentScanDesc, NULL);
|
||||
|
||||
ExecScanReScan(&node->ss);
|
||||
}
|
||||
|
||||
@ -455,6 +475,9 @@ ExecReScanTidScan(TidScanState *node)
|
||||
void
|
||||
ExecEndTidScan(TidScanState *node)
|
||||
{
|
||||
if (node->ss.ss_currentScanDesc)
|
||||
table_endscan(node->ss.ss_currentScanDesc);
|
||||
|
||||
/*
|
||||
* Free the exprcontext
|
||||
*/
|
||||
|
@ -358,6 +358,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
|
||||
Relation rel;
|
||||
AclResult aclresult;
|
||||
Snapshot snapshot;
|
||||
TableScanDesc scan;
|
||||
|
||||
result = (ItemPointer) palloc(sizeof(ItemPointerData));
|
||||
if (!reloid)
|
||||
@ -380,7 +381,9 @@ currtid_byreloid(PG_FUNCTION_ARGS)
|
||||
ItemPointerCopy(tid, result);
|
||||
|
||||
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
||||
table_get_latest_tid(rel, snapshot, result);
|
||||
scan = table_beginscan(rel, snapshot, 0, NULL);
|
||||
table_get_latest_tid(scan, result);
|
||||
table_endscan(scan);
|
||||
UnregisterSnapshot(snapshot);
|
||||
|
||||
table_close(rel, AccessShareLock);
|
||||
@ -398,6 +401,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
|
||||
Relation rel;
|
||||
AclResult aclresult;
|
||||
Snapshot snapshot;
|
||||
TableScanDesc scan;
|
||||
|
||||
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
|
||||
rel = table_openrv(relrv, AccessShareLock);
|
||||
@ -415,7 +419,9 @@ currtid_byrelname(PG_FUNCTION_ARGS)
|
||||
ItemPointerCopy(tid, result);
|
||||
|
||||
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
||||
table_get_latest_tid(rel, snapshot, result);
|
||||
scan = table_beginscan(rel, snapshot, 0, NULL);
|
||||
table_get_latest_tid(scan, result);
|
||||
table_endscan(scan);
|
||||
UnregisterSnapshot(snapshot);
|
||||
|
||||
table_close(rel, AccessShareLock);
|
||||
|
@ -134,8 +134,7 @@ extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
|
||||
Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
|
||||
bool *all_dead, bool first_call);
|
||||
|
||||
extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
|
||||
ItemPointer tid);
|
||||
extern void heap_get_latest_tid(TableScanDesc scan, ItemPointer tid);
|
||||
extern void setLastTid(const ItemPointer tid);
|
||||
|
||||
extern BulkInsertState GetBulkInsertState(void);
|
||||
|
@ -308,12 +308,17 @@ typedef struct TableAmRoutine
|
||||
Snapshot snapshot,
|
||||
TupleTableSlot *slot);
|
||||
|
||||
/*
|
||||
* Is tid valid for a scan of this relation.
|
||||
*/
|
||||
bool (*tuple_tid_valid) (TableScanDesc scan,
|
||||
ItemPointer tid);
|
||||
|
||||
/*
|
||||
* Return the latest version of the tuple at `tid`, by updating `tid` to
|
||||
* point at the newest version.
|
||||
*/
|
||||
void (*tuple_get_latest_tid) (Relation rel,
|
||||
Snapshot snapshot,
|
||||
void (*tuple_get_latest_tid) (TableScanDesc scan,
|
||||
ItemPointer tid);
|
||||
|
||||
/*
|
||||
@ -548,10 +553,10 @@ typedef struct TableAmRoutine
|
||||
/*
|
||||
* See table_relation_size().
|
||||
*
|
||||
* Note that currently a few callers use the MAIN_FORKNUM size to vet the
|
||||
* validity of tids (e.g. nodeTidscans.c), and others use it to figure out
|
||||
* the range of potentially interesting blocks (brin, analyze). The
|
||||
* abstraction around this will need to be improved in the near future.
|
||||
* Note that currently a few callers use the MAIN_FORKNUM size to figure
|
||||
* out the range of potentially interesting blocks (brin, analyze). It's
|
||||
* probable that we'll need to revise the interface for those at some
|
||||
* point.
|
||||
*/
|
||||
uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
|
||||
|
||||
@ -985,15 +990,25 @@ table_fetch_row_version(Relation rel,
|
||||
return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that `tid` is a potentially valid tuple identifier. That doesn't
|
||||
* mean that the pointed to row needs to exist or be visible, but that
|
||||
* attempting to fetch the row (e.g. with table_get_latest_tid() or
|
||||
* table_fetch_row_version()) should not error out if called with that tid.
|
||||
*
|
||||
* `scan` needs to have been started via table_beginscan().
|
||||
*/
|
||||
static inline bool
|
||||
table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
|
||||
{
|
||||
return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the latest version of the tuple at `tid`, by updating `tid` to
|
||||
* point at the newest version.
|
||||
*/
|
||||
static inline void
|
||||
table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
|
||||
{
|
||||
rel->rd_tableam->tuple_get_latest_tid(rel, snapshot, tid);
|
||||
}
|
||||
extern void table_get_latest_tid(TableScanDesc scan, ItemPointer tid);
|
||||
|
||||
/*
|
||||
* Return true iff tuple in slot satisfies the snapshot.
|
||||
|
Loading…
Reference in New Issue
Block a user