From 9fcbe2af11fb966b30117d8ac3c2971d1be14207 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 22 Sep 2004 19:13:52 +0000 Subject: [PATCH] Arrange for hash join to skip scanning the outer relation if it detects that the inner one is completely empty. Per recent discussion. Also some cosmetic cleanups in nearby code. --- src/backend/executor/nodeHash.c | 12 +++++++----- src/backend/executor/nodeHashjoin.c | 19 ++++++++++++++----- src/include/executor/hashjoin.h | 10 ++++++---- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index c4236f5fcc..5beb6359a8 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.86 2004/08/29 04:12:31 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.87 2004/09/22 19:13:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,8 +32,8 @@ /* ---------------------------------------------------------------- * ExecHash * - * build hash table for hashjoin, all do partitioning if more - * than one batches are required. + * build hash table for hashjoin, doing partitioning if more + * than one batch is required. * ---------------------------------------------------------------- */ TupleTableSlot * @@ -81,6 +81,7 @@ ExecHash(HashState *node) slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; + hashtable->hashNonEmpty = true; econtext->ecxt_innertuple = slot; ExecHashTableInsert(hashtable, econtext, hashkeys); ExecClearTuple(slot); @@ -189,7 +190,7 @@ ExecEndHash(HashState *node) /* ---------------------------------------------------------------- * ExecHashTableCreate * - * create a hashtable in shared memory for hashjoin. + * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable @@ -226,12 +227,13 @@ ExecHashTableCreate(Hash *node, List *hashOperators) * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ - hashtable = (HashJoinTable) palloc(sizeof(HashTableData)); + hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->totalbuckets = totalbuckets; hashtable->buckets = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; + hashtable->hashNonEmpty = false; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->innerBatchSize = NULL; diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 53215be6e9..294f481cdf 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.65 2004/09/17 18:28:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.66 2004/09/22 19:13:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -127,6 +127,17 @@ ExecHashJoin(HashJoinState *node) hashNode->hashtable = hashtable; (void) ExecProcNode((PlanState *) hashNode); + /* + * If the inner relation is completely empty, and we're not doing + * an outer join, we can quit without scanning the outer relation. + */ + if (!hashtable->hashNonEmpty && node->js.jointype != JOIN_LEFT) + { + ExecHashTableDestroy(hashtable); + node->hj_HashTable = NULL; + return NULL; + } + /* * Open temp files for outer batches, if needed. Note that file * buffers are palloc'd in regular executor context. @@ -138,10 +149,8 @@ ExecHashJoin(HashJoinState *node) } /* - * Now get an outer tuple and probe into the hash table for matches + * run the hash join process */ - outerTupleSlot = node->js.ps.ps_OuterTupleSlot; - for (;;) { /* @@ -226,7 +235,7 @@ ExecHashJoin(HashJoinState *node) * Only the joinquals determine MatchedOuter status, but all * quals must pass to actually return the tuple. */ - if (ExecQual(joinqual, econtext, false)) + if (joinqual == NIL || ExecQual(joinqual, econtext, false)) { node->hj_MatchedOuter = true; diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 5dd6c5c8fe..8a2eba8e0b 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.32 2004/08/29 04:13:06 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.33 2004/09/22 19:13:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -47,7 +47,7 @@ typedef struct HashJoinTupleData typedef HashJoinTupleData *HashJoinTuple; -typedef struct HashTableData +typedef struct HashJoinTableData { int nbuckets; /* buckets in use during this batch */ int totalbuckets; /* total number of (virtual) buckets */ @@ -57,6 +57,8 @@ typedef struct HashTableData int nbatch; /* number of batches; 0 means 1-pass join */ int curbatch; /* current batch #, or 0 during 1st pass */ + bool hashNonEmpty; /* did inner plan produce any rows? */ + /* * all these arrays are allocated for the life of the hash join, but * only if nbatch > 0: @@ -90,8 +92,8 @@ typedef struct HashTableData MemoryContext hashCxt; /* context for whole-hash-join storage */ MemoryContext batchCxt; /* context for this-batch-only storage */ -} HashTableData; +} HashJoinTableData; -typedef HashTableData *HashJoinTable; +typedef HashJoinTableData *HashJoinTable; #endif /* HASHJOIN_H */