mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-03-13 19:57:53 +08:00
Convert hash join code to use MinimalTuple format in tuple hash table
and batch files. Should reduce memory and I/O demands for such joins.
This commit is contained in:
parent
665c5e861a
commit
69d0a15e2a
@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.95 2006/06/27 02:51:39 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.96 2006/06/27 21:31:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -718,6 +718,55 @@ ExecFetchSlotTuple(TupleTableSlot *slot)
|
||||
return ExecMaterializeSlot(slot);
|
||||
}
|
||||
|
||||
/* --------------------------------
|
||||
* ExecFetchSlotMinimalTuple
|
||||
* Fetch the slot's minimal physical tuple.
|
||||
*
|
||||
* If the slot contains a virtual tuple, we convert it to minimal
|
||||
* physical form. The slot retains ownership of the physical tuple.
|
||||
* Likewise, if it contains a regular tuple we convert to minimal form.
|
||||
*
|
||||
* As above, the result must be treated as read-only.
|
||||
* --------------------------------
|
||||
*/
|
||||
MinimalTuple
|
||||
ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
|
||||
{
|
||||
MinimalTuple newTuple;
|
||||
MemoryContext oldContext;
|
||||
|
||||
/*
|
||||
* sanity checks
|
||||
*/
|
||||
Assert(slot != NULL);
|
||||
Assert(!slot->tts_isempty);
|
||||
|
||||
/*
|
||||
* If we have a minimal physical tuple then just return it.
|
||||
*/
|
||||
if (slot->tts_mintuple)
|
||||
return slot->tts_mintuple;
|
||||
|
||||
/*
|
||||
* Otherwise, build a minimal tuple, and then store it as the new slot
|
||||
* value. (Note: tts_nvalid will be reset to zero here. There are cases
|
||||
* in which this could be optimized but it's probably not worth worrying
|
||||
* about.)
|
||||
*
|
||||
* We may be called in a context that is shorter-lived than the tuple
|
||||
* slot, but we have to ensure that the materialized tuple will survive
|
||||
* anyway.
|
||||
*/
|
||||
oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
|
||||
newTuple = ExecCopySlotMinimalTuple(slot);
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
|
||||
ExecStoreMinimalTuple(newTuple, slot, true);
|
||||
|
||||
Assert(slot->tts_mintuple);
|
||||
return slot->tts_mintuple;
|
||||
}
|
||||
|
||||
/* --------------------------------
|
||||
* ExecMaterializeSlot
|
||||
* Force a slot into the "materialized" state.
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.104 2006/06/27 21:31:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -92,7 +92,7 @@ MultiExecHash(HashState *node)
|
||||
/* We have to compute the hash value */
|
||||
econtext->ecxt_innertuple = slot;
|
||||
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
|
||||
ExecHashTableInsert(hashtable, ExecFetchSlotTuple(slot), hashvalue);
|
||||
ExecHashTableInsert(hashtable, slot, hashvalue);
|
||||
}
|
||||
|
||||
/* must provide our own instrumentation support */
|
||||
@ -358,8 +358,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
|
||||
* does not allow for any palloc overhead. The manipulations of spaceUsed
|
||||
* don't count palloc overhead either.
|
||||
*/
|
||||
tupsize = MAXALIGN(sizeof(HashJoinTupleData)) +
|
||||
MAXALIGN(sizeof(HeapTupleHeaderData)) +
|
||||
tupsize = HJTUPLE_OVERHEAD +
|
||||
MAXALIGN(sizeof(MinimalTupleData)) +
|
||||
MAXALIGN(tupwidth);
|
||||
inner_rel_bytes = ntuples * tupsize;
|
||||
|
||||
@ -548,7 +548,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
|
||||
{
|
||||
/* dump it out */
|
||||
Assert(batchno > curbatch);
|
||||
ExecHashJoinSaveTuple(&tuple->htup, tuple->hashvalue,
|
||||
ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
|
||||
tuple->hashvalue,
|
||||
&hashtable->innerBatchFile[batchno]);
|
||||
/* and remove from hash table */
|
||||
if (prevtuple)
|
||||
@ -557,7 +558,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
|
||||
hashtable->buckets[i] = nexttuple;
|
||||
/* prevtuple doesn't change */
|
||||
hashtable->spaceUsed -=
|
||||
MAXALIGN(sizeof(HashJoinTupleData)) + tuple->htup.t_len;
|
||||
HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE(tuple)->t_len;
|
||||
pfree(tuple);
|
||||
nfreed++;
|
||||
}
|
||||
@ -592,12 +593,19 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
|
||||
* ExecHashTableInsert
|
||||
* insert a tuple into the hash table depending on the hash value
|
||||
* it may just go to a temp file for later batches
|
||||
*
|
||||
* Note: the passed TupleTableSlot may contain a regular, minimal, or virtual
|
||||
* tuple; the minimal case in particular is certain to happen while reloading
|
||||
* tuples from batch files. We could save some cycles in the regular-tuple
|
||||
* case by not forcing the slot contents into minimal form; not clear if it's
|
||||
* worth the messiness required.
|
||||
*/
|
||||
void
|
||||
ExecHashTableInsert(HashJoinTable hashtable,
|
||||
HeapTuple tuple,
|
||||
TupleTableSlot *slot,
|
||||
uint32 hashvalue)
|
||||
{
|
||||
MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot);
|
||||
int bucketno;
|
||||
int batchno;
|
||||
|
||||
@ -615,18 +623,11 @@ ExecHashTableInsert(HashJoinTable hashtable,
|
||||
HashJoinTuple hashTuple;
|
||||
int hashTupleSize;
|
||||
|
||||
hashTupleSize = MAXALIGN(sizeof(HashJoinTupleData)) + tuple->t_len;
|
||||
hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
|
||||
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
|
||||
hashTupleSize);
|
||||
hashTuple->hashvalue = hashvalue;
|
||||
memcpy((char *) &hashTuple->htup,
|
||||
(char *) tuple,
|
||||
sizeof(hashTuple->htup));
|
||||
hashTuple->htup.t_data = (HeapTupleHeader)
|
||||
(((char *) hashTuple) + MAXALIGN(sizeof(HashJoinTupleData)));
|
||||
memcpy((char *) hashTuple->htup.t_data,
|
||||
(char *) tuple->t_data,
|
||||
tuple->t_len);
|
||||
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
|
||||
hashTuple->next = hashtable->buckets[bucketno];
|
||||
hashtable->buckets[bucketno] = hashTuple;
|
||||
hashtable->spaceUsed += hashTupleSize;
|
||||
@ -639,7 +640,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
|
||||
* put the tuple into a temp file for later batches
|
||||
*/
|
||||
Assert(batchno > hashtable->curbatch);
|
||||
ExecHashJoinSaveTuple(tuple, hashvalue,
|
||||
ExecHashJoinSaveTuple(tuple,
|
||||
hashvalue,
|
||||
&hashtable->innerBatchFile[batchno]);
|
||||
}
|
||||
}
|
||||
@ -749,7 +751,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
|
||||
*
|
||||
* The current outer tuple must be stored in econtext->ecxt_outertuple.
|
||||
*/
|
||||
HeapTuple
|
||||
HashJoinTuple
|
||||
ExecScanHashBucket(HashJoinState *hjstate,
|
||||
ExprContext *econtext)
|
||||
{
|
||||
@ -771,14 +773,12 @@ ExecScanHashBucket(HashJoinState *hjstate,
|
||||
{
|
||||
if (hashTuple->hashvalue == hashvalue)
|
||||
{
|
||||
HeapTuple heapTuple = &hashTuple->htup;
|
||||
TupleTableSlot *inntuple;
|
||||
|
||||
/* insert hashtable's tuple into exec slot so ExecQual sees it */
|
||||
inntuple = ExecStoreTuple(heapTuple,
|
||||
hjstate->hj_HashTupleSlot,
|
||||
InvalidBuffer,
|
||||
false); /* do not pfree */
|
||||
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
|
||||
hjstate->hj_HashTupleSlot,
|
||||
false); /* do not pfree */
|
||||
econtext->ecxt_innertuple = inntuple;
|
||||
|
||||
/* reset temp memory each time to avoid leaks from qual expr */
|
||||
@ -787,7 +787,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
|
||||
if (ExecQual(hjclauses, econtext, false))
|
||||
{
|
||||
hjstate->hj_CurTuple = hashTuple;
|
||||
return heapTuple;
|
||||
return hashTuple;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.82 2006/06/16 18:42:22 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.83 2006/06/27 21:31:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -54,7 +54,7 @@ ExecHashJoin(HashJoinState *node)
|
||||
ExprContext *econtext;
|
||||
ExprDoneCond isDone;
|
||||
HashJoinTable hashtable;
|
||||
HeapTuple curtuple;
|
||||
HashJoinTuple curtuple;
|
||||
TupleTableSlot *outerTupleSlot;
|
||||
uint32 hashvalue;
|
||||
int batchno;
|
||||
@ -224,7 +224,7 @@ ExecHashJoin(HashJoinState *node)
|
||||
* in the corresponding outer-batch file.
|
||||
*/
|
||||
Assert(batchno > hashtable->curbatch);
|
||||
ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
|
||||
ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
|
||||
hashvalue,
|
||||
&hashtable->outerBatchFile[batchno]);
|
||||
node->hj_NeedNewOuter = true;
|
||||
@ -244,10 +244,9 @@ ExecHashJoin(HashJoinState *node)
|
||||
/*
|
||||
* we've got a match, but still need to test non-hashed quals
|
||||
*/
|
||||
inntuple = ExecStoreTuple(curtuple,
|
||||
node->hj_HashTupleSlot,
|
||||
InvalidBuffer,
|
||||
false); /* don't pfree this tuple */
|
||||
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
|
||||
node->hj_HashTupleSlot,
|
||||
false); /* don't pfree */
|
||||
econtext->ecxt_innertuple = inntuple;
|
||||
|
||||
/* reset temp memory each time to avoid leaks from qual expr */
|
||||
@ -706,9 +705,7 @@ start_over:
|
||||
* NOTE: some tuples may be sent to future batches. Also, it is
|
||||
* possible for hashtable->nbatch to be increased here!
|
||||
*/
|
||||
ExecHashTableInsert(hashtable,
|
||||
ExecFetchSlotTuple(slot),
|
||||
hashvalue);
|
||||
ExecHashTableInsert(hashtable, slot, hashvalue);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -741,15 +738,14 @@ start_over:
|
||||
* save a tuple to a batch file.
|
||||
*
|
||||
* The data recorded in the file for each tuple is its hash value,
|
||||
* then an image of its HeapTupleData (with meaningless t_data pointer)
|
||||
* followed by the HeapTupleHeader and tuple data.
|
||||
* then the tuple in MinimalTuple format.
|
||||
*
|
||||
* Note: it is important always to call this in the regular executor
|
||||
* context, not in a shorter-lived context; else the temp file buffers
|
||||
* will get messed up.
|
||||
*/
|
||||
void
|
||||
ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
|
||||
ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
|
||||
BufFile **fileptr)
|
||||
{
|
||||
BufFile *file = *fileptr;
|
||||
@ -768,14 +764,8 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write to hash-join temporary file: %m")));
|
||||
|
||||
written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
|
||||
if (written != sizeof(HeapTupleData))
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write to hash-join temporary file: %m")));
|
||||
|
||||
written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
|
||||
if (written != (size_t) heapTuple->t_len)
|
||||
written = BufFileWrite(file, (void *) tuple, tuple->t_len);
|
||||
if (written != tuple->t_len)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write to hash-join temporary file: %m")));
|
||||
@ -794,32 +784,36 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
|
||||
uint32 *hashvalue,
|
||||
TupleTableSlot *tupleSlot)
|
||||
{
|
||||
HeapTupleData htup;
|
||||
uint32 header[2];
|
||||
size_t nread;
|
||||
HeapTuple heapTuple;
|
||||
MinimalTuple tuple;
|
||||
|
||||
nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32));
|
||||
if (nread == 0)
|
||||
return NULL; /* end of file */
|
||||
if (nread != sizeof(uint32))
|
||||
/*
|
||||
* Since both the hash value and the MinimalTuple length word are
|
||||
* uint32, we can read them both in one BufFileRead() call without
|
||||
* any type cheating.
|
||||
*/
|
||||
nread = BufFileRead(file, (void *) header, sizeof(header));
|
||||
if (nread == 0) /* end of file */
|
||||
{
|
||||
ExecClearTuple(tupleSlot);
|
||||
return NULL;
|
||||
}
|
||||
if (nread != sizeof(header))
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not read from hash-join temporary file: %m")));
|
||||
nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
|
||||
if (nread != sizeof(HeapTupleData))
|
||||
*hashvalue = header[0];
|
||||
tuple = (MinimalTuple) palloc(header[1]);
|
||||
tuple->t_len = header[1];
|
||||
nread = BufFileRead(file,
|
||||
(void *) ((char *) tuple + sizeof(uint32)),
|
||||
header[1] - sizeof(uint32));
|
||||
if (nread != header[1] - sizeof(uint32))
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not read from hash-join temporary file: %m")));
|
||||
heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
|
||||
memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
|
||||
heapTuple->t_data = (HeapTupleHeader)
|
||||
((char *) heapTuple + HEAPTUPLESIZE);
|
||||
nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
|
||||
if (nread != (size_t) htup.t_len)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not read from hash-join temporary file: %m")));
|
||||
return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
|
||||
return ExecStoreMinimalTuple(tuple, tupleSlot, true);
|
||||
}
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.38 2006/03/05 15:58:56 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.39 2006/06/27 21:31:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -65,9 +65,14 @@ typedef struct HashJoinTupleData
|
||||
{
|
||||
struct HashJoinTupleData *next; /* link to next tuple in same bucket */
|
||||
uint32 hashvalue; /* tuple's hash code */
|
||||
HeapTupleData htup; /* tuple header */
|
||||
/* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
|
||||
} HashJoinTupleData;
|
||||
|
||||
#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
|
||||
#define HJTUPLE_MINTUPLE(hjtup) \
|
||||
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
|
||||
|
||||
|
||||
typedef struct HashJoinTableData
|
||||
{
|
||||
int nbuckets; /* # buckets in the in-memory hash table */
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.40 2006/03/05 15:58:56 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.41 2006/06/27 21:31:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -26,7 +26,7 @@ extern void ExecReScanHash(HashState *node, ExprContext *exprCtxt);
|
||||
extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators);
|
||||
extern void ExecHashTableDestroy(HashJoinTable hashtable);
|
||||
extern void ExecHashTableInsert(HashJoinTable hashtable,
|
||||
HeapTuple tuple,
|
||||
TupleTableSlot *slot,
|
||||
uint32 hashvalue);
|
||||
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
|
||||
ExprContext *econtext,
|
||||
@ -35,7 +35,7 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
|
||||
uint32 hashvalue,
|
||||
int *bucketno,
|
||||
int *batchno);
|
||||
extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate,
|
||||
extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate,
|
||||
ExprContext *econtext);
|
||||
extern void ExecHashTableReset(HashJoinTable hashtable);
|
||||
extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.32 2006/03/05 15:58:56 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.33 2006/06/27 21:31:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -23,7 +23,7 @@ extern TupleTableSlot *ExecHashJoin(HashJoinState *node);
|
||||
extern void ExecEndHashJoin(HashJoinState *node);
|
||||
extern void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt);
|
||||
|
||||
extern void ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
|
||||
extern void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
|
||||
BufFile **fileptr);
|
||||
|
||||
#endif /* NODEHASHJOIN_H */
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.32 2006/06/27 02:51:40 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.33 2006/06/27 21:31:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -159,6 +159,7 @@ extern TupleTableSlot *ExecStoreAllNullTuple(TupleTableSlot *slot);
|
||||
extern HeapTuple ExecCopySlotTuple(TupleTableSlot *slot);
|
||||
extern MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot);
|
||||
extern HeapTuple ExecFetchSlotTuple(TupleTableSlot *slot);
|
||||
extern MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot);
|
||||
extern HeapTuple ExecMaterializeSlot(TupleTableSlot *slot);
|
||||
extern TupleTableSlot *ExecCopySlot(TupleTableSlot *dstslot,
|
||||
TupleTableSlot *srcslot);
|
||||
|
Loading…
x
Reference in New Issue
Block a user