From 2103b7baa26fe25cc2b5bee802028caeabb28113 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 6 Nov 2002 22:31:24 +0000 Subject: [PATCH] Phase 2 of hashed-aggregation project. nodeAgg.c now knows how to do hashed aggregation, but there's not yet planner support for it. --- src/backend/executor/nodeAgg.c | 784 +++++++++++++++++------- src/backend/executor/nodeGroup.c | 6 +- src/backend/executor/nodeHash.c | 23 +- src/backend/nodes/copyfuncs.c | 3 +- src/backend/nodes/outfuncs.c | 6 +- src/backend/optimizer/plan/createplan.c | 3 +- src/backend/optimizer/plan/planner.c | 63 +- src/backend/optimizer/util/clauses.c | 29 +- src/include/executor/nodeHash.h | 3 +- src/include/nodes/execnodes.h | 36 +- src/include/nodes/plannodes.h | 3 +- src/include/optimizer/clauses.h | 3 +- 12 files changed, 696 insertions(+), 266 deletions(-) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 7714a68090..824e029927 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -29,24 +29,23 @@ * of course). A non-strict finalfunc can make its own choice of * what to return for a NULL ending transvalue. * - * When the transvalue datatype is pass-by-reference, we have to be - * careful to ensure that the values survive across tuple cycles yet - * are not allowed to accumulate until end of query. We do this by - * "ping-ponging" between two memory contexts; successive calls to the - * transfunc are executed in alternate contexts, passing the previous - * transvalue that is in the other context. At the beginning of each - * tuple cycle we can reset the current output context to avoid memory - * usage growth. Note: we must use MemoryContextContains() to check - * whether the transfunc has perhaps handed us back one of its input - * values rather than a freshly palloc'd value; if so, we copy the value - * to the context we want it in. + * We compute aggregate input expressions and run the transition functions + * in a temporary econtext (aggstate->tmpcontext). This is reset at + * least once per input tuple, so when the transvalue datatype is + * pass-by-reference, we have to be careful to copy it into a longer-lived + * memory context, and free the prior value to avoid memory leakage. + * We store transvalues in the memory context aggstate->aggcontext, + * which is also used for the hashtable structures in AGG_HASHED mode. + * The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext) + * is used to run finalize functions and compute the output tuple; + * this context can be reset once per output tuple. * * * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.91 2002/11/06 00:00:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.92 2002/11/06 22:31:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -59,6 +58,7 @@ #include "executor/executor.h" #include "executor/nodeAgg.h" #include "executor/nodeGroup.h" +#include "executor/nodeHash.h" #include "miscadmin.h" #include "optimizer/clauses.h" #include "parser/parse_coerce.h" @@ -140,8 +140,27 @@ typedef struct AggStatePerAggData */ Tuplesortstate *sortstate; /* sort object, if a DISTINCT agg */ +} AggStatePerAggData; - Datum transValue; +/* + * AggStatePerGroupData - per-aggregate-per-group working state + * + * These values are working state that is initialized at the start of + * an input tuple group and updated for each input tuple. + * + * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these + * structs (pointed to by aggstate->pergroup); we re-use the array for + * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the + * hash table contains an array of these structs for each tuple group. + * + * Logically, the sortstate field belongs in this struct, but we do not + * keep it here for space reasons: we don't support DISTINCT aggregates + * in AGG_HASHED mode, so there's no reason to use up a pointer field + * in every entry of the hashtable. + */ +typedef struct AggStatePerGroupData +{ + Datum transValue; /* current transition value */ bool transValueIsNull; bool noTransValue; /* true if transValue not set yet */ @@ -154,97 +173,143 @@ typedef struct AggStatePerAggData * later input value. Only the first non-NULL input will be * auto-substituted. */ -} AggStatePerAggData; +} AggStatePerGroupData; + +/* + * To implement hashed aggregation, we need a hashtable that stores a + * representative tuple and an array of AggStatePerGroup structs for each + * distinct set of GROUP BY column values. We compute the hash key from + * the GROUP BY columns. + */ +typedef struct AggHashEntryData +{ + AggHashEntry next; /* next entry in same hash bucket */ + uint32 hashkey; /* exact hash key of this entry */ + HeapTuple firstTuple; /* copy of first tuple in this group */ + /* per-aggregate transition status array - must be last! */ + AggStatePerGroupData pergroup[1]; /* VARIABLE LENGTH ARRAY */ +} AggHashEntryData; /* VARIABLE LENGTH STRUCT */ + +typedef struct AggHashTableData +{ + int nbuckets; /* number of buckets in hash table */ + AggHashEntry buckets[1]; /* VARIABLE LENGTH ARRAY */ +} AggHashTableData; /* VARIABLE LENGTH STRUCT */ -static void initialize_aggregate(AggStatePerAgg peraggstate); -static void advance_transition_function(AggStatePerAgg peraggstate, - Datum newVal, bool isNull); -static void advance_aggregates(AggState *aggstate, ExprContext *econtext); +static void initialize_aggregates(AggState *aggstate, + AggStatePerAgg peragg, + AggStatePerGroup pergroup); +static void advance_transition_function(AggState *aggstate, + AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate, + Datum newVal, bool isNull); +static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup); static void process_sorted_aggregate(AggState *aggstate, - AggStatePerAgg peraggstate); -static void finalize_aggregate(AggStatePerAgg peraggstate, - Datum *resultVal, bool *resultIsNull); + AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate); +static void finalize_aggregate(AggState *aggstate, + AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate, + Datum *resultVal, bool *resultIsNull); +static void build_hash_table(Agg *node); +static AggHashEntry lookup_hash_entry(Agg *node, TupleTableSlot *slot); +static TupleTableSlot *agg_retrieve_direct(Agg *node); +static void agg_fill_hash_table(Agg *node); +static TupleTableSlot *agg_retrieve_hash_table(Agg *node); static Datum GetAggInitVal(Datum textInitVal, Oid transtype); /* - * Initialize one aggregate for a new set of input values. + * Initialize all aggregates for a new group of input values. * * When called, CurrentMemoryContext should be the per-query context. */ static void -initialize_aggregate(AggStatePerAgg peraggstate) +initialize_aggregates(AggState *aggstate, + AggStatePerAgg peragg, + AggStatePerGroup pergroup) { - Aggref *aggref = peraggstate->aggref; + int aggno; - /* - * Start a fresh sort operation for each DISTINCT aggregate. - */ - if (aggref->aggdistinct) + for (aggno = 0; aggno < aggstate->numaggs; aggno++) { + AggStatePerAgg peraggstate = &peragg[aggno]; + AggStatePerGroup pergroupstate = &pergroup[aggno]; + Aggref *aggref = peraggstate->aggref; + /* - * In case of rescan, maybe there could be an uncompleted sort - * operation? Clean it up if so. + * Start a fresh sort operation for each DISTINCT aggregate. */ - if (peraggstate->sortstate) - tuplesort_end(peraggstate->sortstate); + if (aggref->aggdistinct) + { + /* + * In case of rescan, maybe there could be an uncompleted sort + * operation? Clean it up if so. + */ + if (peraggstate->sortstate) + tuplesort_end(peraggstate->sortstate); - peraggstate->sortstate = - tuplesort_begin_datum(peraggstate->inputType, - peraggstate->sortOperator, - false); + peraggstate->sortstate = + tuplesort_begin_datum(peraggstate->inputType, + peraggstate->sortOperator, + false); + } + + /* + * (Re)set transValue to the initial value. + * + * Note that when the initial value is pass-by-ref, we must copy it + * (into the aggcontext) since we will pfree the transValue later. + */ + if (peraggstate->initValueIsNull) + pergroupstate->transValue = peraggstate->initValue; + else + { + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(aggstate->aggcontext); + pergroupstate->transValue = datumCopy(peraggstate->initValue, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + MemoryContextSwitchTo(oldContext); + } + pergroupstate->transValueIsNull = peraggstate->initValueIsNull; + + /* + * If the initial value for the transition state doesn't exist in the + * pg_aggregate table then we will let the first non-NULL value + * returned from the outer procNode become the initial value. (This is + * useful for aggregates like max() and min().) The noTransValue flag + * signals that we still need to do this. + */ + pergroupstate->noTransValue = peraggstate->initValueIsNull; } - - /* - * (Re)set transValue to the initial value. - * - * Note that when the initial value is pass-by-ref, we just reuse it - * without copying for each group. Hence, transition function had - * better not scribble on its input, or it will fail for GROUP BY! - */ - peraggstate->transValue = peraggstate->initValue; - peraggstate->transValueIsNull = peraggstate->initValueIsNull; - - /* - * If the initial value for the transition state doesn't exist in the - * pg_aggregate table then we will let the first non-NULL value - * returned from the outer procNode become the initial value. (This is - * useful for aggregates like max() and min().) The noTransValue flag - * signals that we still need to do this. - */ - peraggstate->noTransValue = peraggstate->initValueIsNull; } /* * Given a new input value, advance the transition function of an aggregate. * - * When called, CurrentMemoryContext should be the context we want the - * transition function result to be delivered into on this cycle. + * It doesn't matter which memory context this is called in. */ static void -advance_transition_function(AggStatePerAgg peraggstate, +advance_transition_function(AggState *aggstate, + AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate, Datum newVal, bool isNull) { FunctionCallInfoData fcinfo; + MemoryContext oldContext; if (peraggstate->transfn.fn_strict) { + /* + * For a strict transfn, nothing happens at a NULL input + * tuple; we just keep the prior transValue. + */ if (isNull) - { - /* - * For a strict transfn, nothing happens at a NULL input - * tuple; we just keep the prior transValue. However, if the - * transtype is pass-by-ref, we have to copy it into the new - * context because the old one is going to get reset. - */ - if (!peraggstate->transValueIsNull) - peraggstate->transValue = datumCopy(peraggstate->transValue, - peraggstate->transtypeByVal, - peraggstate->transtypeLen); return; - } - if (peraggstate->noTransValue) + if (pergroupstate->noTransValue) { /* * transValue has not been initialized. This is the first @@ -253,18 +318,19 @@ advance_transition_function(AggStatePerAgg peraggstate, * is binary-compatible with its transtype, so straight copy * here is OK.) * - * We had better copy the datum if it is pass-by-ref, since the - * given pointer may be pointing into a scan tuple that will - * be freed on the next iteration of the scan. + * We must copy the datum into aggcontext if it is pass-by-ref. + * We do not need to pfree the old transValue, since it's NULL. */ - peraggstate->transValue = datumCopy(newVal, - peraggstate->transtypeByVal, - peraggstate->transtypeLen); - peraggstate->transValueIsNull = false; - peraggstate->noTransValue = false; + oldContext = MemoryContextSwitchTo(aggstate->aggcontext); + pergroupstate->transValue = datumCopy(newVal, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + pergroupstate->transValueIsNull = false; + pergroupstate->noTransValue = false; + MemoryContextSwitchTo(oldContext); return; } - if (peraggstate->transValueIsNull) + if (pergroupstate->transValueIsNull) { /* * Don't call a strict function with NULL inputs. Note it is @@ -277,6 +343,9 @@ advance_transition_function(AggStatePerAgg peraggstate, } } + /* We run the transition functions in per-input-tuple memory context */ + oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory); + /* * OK to call the transition function * @@ -291,84 +360,76 @@ advance_transition_function(AggStatePerAgg peraggstate, fcinfo.flinfo = &peraggstate->transfn; fcinfo.nargs = 2; - fcinfo.arg[0] = peraggstate->transValue; - fcinfo.argnull[0] = peraggstate->transValueIsNull; + fcinfo.arg[0] = pergroupstate->transValue; + fcinfo.argnull[0] = pergroupstate->transValueIsNull; fcinfo.arg[1] = newVal; fcinfo.argnull[1] = isNull; newVal = FunctionCallInvoke(&fcinfo); /* - * If the transition function was uncooperative, it may have given us - * a pass-by-ref result that points at the scan tuple or the - * prior-cycle working memory. Copy it into the active context if it - * doesn't look right. + * If pass-by-ref datatype, must copy the new value into aggcontext and + * pfree the prior transValue. But if transfn returned a pointer to its + * first input, we don't need to do anything. */ - if (!peraggstate->transtypeByVal && !fcinfo.isnull && - !MemoryContextContains(CurrentMemoryContext, - DatumGetPointer(newVal))) - newVal = datumCopy(newVal, - peraggstate->transtypeByVal, - peraggstate->transtypeLen); + if (!peraggstate->transtypeByVal && + DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue)) + { + if (!fcinfo.isnull) + { + MemoryContextSwitchTo(aggstate->aggcontext); + newVal = datumCopy(newVal, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + } + if (!pergroupstate->transValueIsNull) + pfree(DatumGetPointer(pergroupstate->transValue)); + } - peraggstate->transValue = newVal; - peraggstate->transValueIsNull = fcinfo.isnull; + pergroupstate->transValue = newVal; + pergroupstate->transValueIsNull = fcinfo.isnull; + + MemoryContextSwitchTo(oldContext); } /* * Advance all the aggregates for one input tuple. The input tuple - * has been stored in econtext->ecxt_scantuple, so that it is accessible - * to ExecEvalExpr. + * has been stored in tmpcontext->ecxt_scantuple, so that it is accessible + * to ExecEvalExpr. pergroup is the array of per-group structs to use + * (this might be in a hashtable entry). * * When called, CurrentMemoryContext should be the per-query context. */ static void -advance_aggregates(AggState *aggstate, ExprContext *econtext) +advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) { - MemoryContext oldContext; + ExprContext *econtext = aggstate->tmpcontext; int aggno; - /* - * Clear and select the current working context for evaluation - * of the input expressions and transition functions at this - * input tuple. - */ - econtext->ecxt_per_tuple_memory = aggstate->agg_cxt[aggstate->which_cxt]; - ResetExprContext(econtext); - oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); - for (aggno = 0; aggno < aggstate->numaggs; aggno++) { AggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + AggStatePerGroup pergroupstate = &pergroup[aggno]; Aggref *aggref = peraggstate->aggref; Datum newVal; bool isNull; - newVal = ExecEvalExpr(aggref->target, econtext, &isNull, NULL); + newVal = ExecEvalExprSwitchContext(aggref->target, econtext, + &isNull, NULL); if (aggref->aggdistinct) { /* in DISTINCT mode, we may ignore nulls */ if (isNull) continue; - /* putdatum has to be called in per-query context */ - MemoryContextSwitchTo(oldContext); tuplesort_putdatum(peraggstate->sortstate, newVal, isNull); - MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); } else { - advance_transition_function(peraggstate, newVal, isNull); + advance_transition_function(aggstate, peraggstate, pergroupstate, + newVal, isNull); } } - - /* - * Make the other context current so that these transition - * results are preserved. - */ - aggstate->which_cxt = 1 - aggstate->which_cxt; - - MemoryContextSwitchTo(oldContext); } /* @@ -381,10 +442,12 @@ advance_aggregates(AggState *aggstate, ExprContext *econtext) */ static void process_sorted_aggregate(AggState *aggstate, - AggStatePerAgg peraggstate) + AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate) { Datum oldVal = (Datum) 0; bool haveOldVal = false; + MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory; MemoryContext oldContext; Datum newVal; bool isNull; @@ -408,12 +471,11 @@ process_sorted_aggregate(AggState *aggstate, continue; /* - * Clear and select the current working context for evaluation of + * Clear and select the working context for evaluation of * the equality function and transition function. */ - MemoryContextReset(aggstate->agg_cxt[aggstate->which_cxt]); - oldContext = - MemoryContextSwitchTo(aggstate->agg_cxt[aggstate->which_cxt]); + MemoryContextReset(workcontext); + oldContext = MemoryContextSwitchTo(workcontext); if (haveOldVal && DatumGetBool(FunctionCall2(&peraggstate->equalfn, @@ -422,24 +484,15 @@ process_sorted_aggregate(AggState *aggstate, /* equal to prior, so forget this one */ if (!peraggstate->inputtypeByVal) pfree(DatumGetPointer(newVal)); - - /* - * note we do NOT flip contexts in this case, so no need to - * copy prior transValue to other context. - */ } else { - advance_transition_function(peraggstate, newVal, false); - - /* - * Make the other context current so that this transition - * result is preserved. - */ - aggstate->which_cxt = 1 - aggstate->which_cxt; + advance_transition_function(aggstate, peraggstate, pergroupstate, + newVal, false); /* forget the old value, if any */ if (haveOldVal && !peraggstate->inputtypeByVal) pfree(DatumGetPointer(oldVal)); + /* and remember the new one for subsequent equality checks */ oldVal = newVal; haveOldVal = true; } @@ -457,13 +510,19 @@ process_sorted_aggregate(AggState *aggstate, /* * Compute the final value of one aggregate. * - * When called, CurrentMemoryContext should be the context where we want - * final values delivered (ie, the per-output-tuple expression context). + * The finalfunction will be run, and the result delivered, in the + * output-tuple context; caller's CurrentMemoryContext does not matter. */ static void -finalize_aggregate(AggStatePerAgg peraggstate, +finalize_aggregate(AggState *aggstate, + AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate, Datum *resultVal, bool *resultIsNull) { + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory); + /* * Apply the agg's finalfn if one is provided, else return transValue. */ @@ -474,9 +533,9 @@ finalize_aggregate(AggStatePerAgg peraggstate, MemSet(&fcinfo, 0, sizeof(fcinfo)); fcinfo.flinfo = &peraggstate->finalfn; fcinfo.nargs = 1; - fcinfo.arg[0] = peraggstate->transValue; - fcinfo.argnull[0] = peraggstate->transValueIsNull; - if (fcinfo.flinfo->fn_strict && peraggstate->transValueIsNull) + fcinfo.arg[0] = pergroupstate->transValue; + fcinfo.argnull[0] = pergroupstate->transValueIsNull; + if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull) { /* don't call a strict function with NULL inputs */ *resultVal = (Datum) 0; @@ -490,8 +549,8 @@ finalize_aggregate(AggStatePerAgg peraggstate, } else { - *resultVal = peraggstate->transValue; - *resultIsNull = peraggstate->transValueIsNull; + *resultVal = pergroupstate->transValue; + *resultIsNull = pergroupstate->transValueIsNull; } /* @@ -503,8 +562,111 @@ finalize_aggregate(AggStatePerAgg peraggstate, *resultVal = datumCopy(*resultVal, peraggstate->resulttypeByVal, peraggstate->resulttypeLen); + + MemoryContextSwitchTo(oldContext); } +/* + * Initialize the hash table to empty. + * + * The hash table always lives in the aggcontext memory context. + */ +static void +build_hash_table(Agg *node) +{ + AggState *aggstate = node->aggstate; + AggHashTable hashtable; + Size tabsize; + + Assert(node->aggstrategy == AGG_HASHED); + Assert(node->numGroups > 0); + tabsize = sizeof(AggHashTableData) + + (node->numGroups - 1) * sizeof(AggHashEntry); + hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext, + tabsize); + MemSet(hashtable, 0, tabsize); + hashtable->nbuckets = node->numGroups; + aggstate->hashtable = hashtable; +} + +/* + * Find or create a hashtable entry for the tuple group containing the + * given tuple. + * + * When called, CurrentMemoryContext should be the per-query context. + */ +static AggHashEntry +lookup_hash_entry(Agg *node, TupleTableSlot *slot) +{ + AggState *aggstate = node->aggstate; + AggHashTable hashtable = aggstate->hashtable; + MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory; + HeapTuple tuple = slot->val; + TupleDesc tupdesc = slot->ttc_tupleDescriptor; + uint32 hashkey = 0; + int i; + int bucketno; + AggHashEntry entry; + MemoryContext oldContext; + Size entrysize; + + /* Need to run the hash function in short-lived context */ + oldContext = MemoryContextSwitchTo(tmpmem); + + for (i = 0; i < node->numCols; i++) + { + AttrNumber att = node->grpColIdx[i]; + Datum attr; + bool isNull; + + attr = heap_getattr(tuple, att, tupdesc, &isNull); + if (isNull) + continue; /* treat nulls as having hash key 0 */ + hashkey ^= ComputeHashFunc(attr, + (int) tupdesc->attrs[att - 1]->attlen, + tupdesc->attrs[att - 1]->attbyval); + } + bucketno = hashkey % (uint32) hashtable->nbuckets; + + for (entry = hashtable->buckets[bucketno]; + entry != NULL; + entry = entry->next) + { + /* Quick check using hashkey */ + if (entry->hashkey != hashkey) + continue; + if (execTuplesMatch(entry->firstTuple, + tuple, + tupdesc, + node->numCols, node->grpColIdx, + aggstate->eqfunctions, + tmpmem)) + { + MemoryContextSwitchTo(oldContext); + return entry; + } + } + + /* Not there, so build a new one */ + MemoryContextSwitchTo(aggstate->aggcontext); + entrysize = sizeof(AggHashEntryData) + + (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData); + entry = (AggHashEntry) palloc(entrysize); + MemSet(entry, 0, entrysize); + + entry->hashkey = hashkey; + entry->firstTuple = heap_copytuple(tuple); + + entry->next = hashtable->buckets[bucketno]; + hashtable->buckets[bucketno] = entry; + + MemoryContextSwitchTo(oldContext); + + /* initialize aggregates for new tuple group */ + initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup); + + return entry; +} /* * ExecAgg - @@ -521,16 +683,39 @@ finalize_aggregate(AggStatePerAgg peraggstate, */ TupleTableSlot * ExecAgg(Agg *node) +{ + AggState *aggstate = node->aggstate; + + if (aggstate->agg_done) + return NULL; + + if (node->aggstrategy == AGG_HASHED) + { + if (!aggstate->table_filled) + agg_fill_hash_table(node); + return agg_retrieve_hash_table(node); + } + else + { + return agg_retrieve_direct(node); + } +} + +/* + * ExecAgg for non-hashed case + */ +static TupleTableSlot * +agg_retrieve_direct(Agg *node) { AggState *aggstate; - EState *estate; Plan *outerPlan; ExprContext *econtext; + ExprContext *tmpcontext; ProjectionInfo *projInfo; Datum *aggvalues; bool *aggnulls; AggStatePerAgg peragg; - MemoryContext oldContext; + AggStatePerGroup pergroup; TupleTableSlot *outerslot; TupleTableSlot *firstSlot; TupleTableSlot *resultSlot; @@ -540,13 +725,16 @@ ExecAgg(Agg *node) * get state info from node */ aggstate = node->aggstate; - estate = node->plan.state; outerPlan = outerPlan(node); + /* econtext is the per-output-tuple expression context */ econtext = aggstate->csstate.cstate.cs_ExprContext; aggvalues = econtext->ecxt_aggvalues; aggnulls = econtext->ecxt_aggnulls; + /* tmpcontext is the per-input-tuple expression context */ + tmpcontext = aggstate->tmpcontext; projInfo = aggstate->csstate.cstate.cs_ProjInfo; peragg = aggstate->peragg; + pergroup = aggstate->pergroup; firstSlot = aggstate->csstate.css_ScanTupleSlot; /* @@ -586,17 +774,12 @@ ExecAgg(Agg *node) /* * Clear the per-output-tuple context for each group */ - MemoryContextReset(aggstate->tup_cxt); + ResetExprContext(econtext); /* * Initialize working state for a new input tuple group */ - for (aggno = 0; aggno < aggstate->numaggs; aggno++) - { - AggStatePerAgg peraggstate = &peragg[aggno]; - - initialize_aggregate(peraggstate); - } + initialize_aggregates(aggstate, peragg, pergroup); if (aggstate->grp_firstTuple != NULL) { @@ -612,7 +795,7 @@ ExecAgg(Agg *node) aggstate->grp_firstTuple = NULL; /* don't keep two pointers */ /* set up for first advance_aggregates call */ - econtext->ecxt_scantuple = firstSlot; + tmpcontext->ecxt_scantuple = firstSlot; /* * Process each outer-plan tuple, and then fetch the next one, @@ -620,7 +803,10 @@ ExecAgg(Agg *node) */ for (;;) { - advance_aggregates(aggstate, econtext); + advance_aggregates(aggstate, pergroup); + + /* Reset per-input-tuple context after each tuple */ + ResetExprContext(tmpcontext); outerslot = ExecProcNode(outerPlan, (Plan *) node); if (TupIsNull(outerslot)) @@ -630,7 +816,7 @@ ExecAgg(Agg *node) break; } /* set up for next advance_aggregates call */ - econtext->ecxt_scantuple = outerslot; + tmpcontext->ecxt_scantuple = outerslot; /* * If we are grouping, check whether we've crossed a group @@ -643,7 +829,7 @@ ExecAgg(Agg *node) firstSlot->ttc_tupleDescriptor, node->numCols, node->grpColIdx, aggstate->eqfunctions, - aggstate->agg_cxt[aggstate->which_cxt])) + tmpcontext->ecxt_per_tuple_memory)) { /* * Save the first input tuple of the next group. @@ -658,37 +844,17 @@ ExecAgg(Agg *node) /* * Done scanning input tuple group. Finalize each aggregate * calculation, and stash results in the per-output-tuple context. - * - * This is a bit tricky when there are both DISTINCT and plain - * aggregates: we must first finalize all the plain aggs and then - * all the DISTINCT ones. This is needed because the last - * transition values for the plain aggs are stored in the - * not-current working context, and we have to evaluate those aggs - * (and stash the results in the output tup_cxt!) before we start - * flipping contexts again in process_sorted_aggregate. */ - oldContext = MemoryContextSwitchTo(aggstate->tup_cxt); - for (aggno = 0; aggno < aggstate->numaggs; aggno++) - { - AggStatePerAgg peraggstate = &peragg[aggno]; - - if (!peraggstate->aggref->aggdistinct) - finalize_aggregate(peraggstate, - &aggvalues[aggno], &aggnulls[aggno]); - } - MemoryContextSwitchTo(oldContext); for (aggno = 0; aggno < aggstate->numaggs; aggno++) { AggStatePerAgg peraggstate = &peragg[aggno]; + AggStatePerGroup pergroupstate = &pergroup[aggno]; if (peraggstate->aggref->aggdistinct) - { - process_sorted_aggregate(aggstate, peraggstate); - oldContext = MemoryContextSwitchTo(aggstate->tup_cxt); - finalize_aggregate(peraggstate, - &aggvalues[aggno], &aggnulls[aggno]); - MemoryContextSwitchTo(oldContext); - } + process_sorted_aggregate(aggstate, peraggstate, pergroupstate); + + finalize_aggregate(aggstate, peraggstate, pergroupstate, + &aggvalues[aggno], &aggnulls[aggno]); } /* @@ -737,9 +903,158 @@ ExecAgg(Agg *node) } /* - * Do projection and qual check in the per-output-tuple context. + * Form a projection tuple using the aggregate results and the + * representative input tuple. Store it in the result tuple slot. + * Note we do not support aggregates returning sets ... */ - econtext->ecxt_per_tuple_memory = aggstate->tup_cxt; + econtext->ecxt_scantuple = firstSlot; + resultSlot = ExecProject(projInfo, NULL); + + /* + * If the completed tuple does not match the qualifications, it is + * ignored and we loop back to try to process another group. + * Otherwise, return the tuple. + */ + } + while (!ExecQual(node->plan.qual, econtext, false)); + + return resultSlot; +} + +/* + * ExecAgg for hashed case: phase 1, read input and build hash table + */ +static void +agg_fill_hash_table(Agg *node) +{ + AggState *aggstate; + Plan *outerPlan; + ExprContext *tmpcontext; + AggHashEntry entry; + TupleTableSlot *outerslot; + + /* + * get state info from node + */ + aggstate = node->aggstate; + outerPlan = outerPlan(node); + /* tmpcontext is the per-input-tuple expression context */ + tmpcontext = aggstate->tmpcontext; + + /* + * Process each outer-plan tuple, and then fetch the next one, + * until we exhaust the outer plan. + */ + for (;;) + { + outerslot = ExecProcNode(outerPlan, (Plan *) node); + if (TupIsNull(outerslot)) + break; + /* set up for advance_aggregates call */ + tmpcontext->ecxt_scantuple = outerslot; + + /* Find or build hashtable entry for this tuple's group */ + entry = lookup_hash_entry(node, outerslot); + + /* Advance the aggregates */ + advance_aggregates(aggstate, entry->pergroup); + + /* Reset per-input-tuple context after each tuple */ + ResetExprContext(tmpcontext); + } + + aggstate->table_filled = true; + /* Initialize to walk the hash table */ + aggstate->next_hash_entry = NULL; + aggstate->next_hash_bucket = 0; +} + +/* + * ExecAgg for hashed case: phase 2, retrieving groups from hash table + */ +static TupleTableSlot * +agg_retrieve_hash_table(Agg *node) +{ + AggState *aggstate; + ExprContext *econtext; + ProjectionInfo *projInfo; + Datum *aggvalues; + bool *aggnulls; + AggStatePerAgg peragg; + AggStatePerGroup pergroup; + AggHashTable hashtable; + AggHashEntry entry; + TupleTableSlot *firstSlot; + TupleTableSlot *resultSlot; + int aggno; + + /* + * get state info from node + */ + aggstate = node->aggstate; + /* econtext is the per-output-tuple expression context */ + econtext = aggstate->csstate.cstate.cs_ExprContext; + aggvalues = econtext->ecxt_aggvalues; + aggnulls = econtext->ecxt_aggnulls; + projInfo = aggstate->csstate.cstate.cs_ProjInfo; + peragg = aggstate->peragg; + hashtable = aggstate->hashtable; + firstSlot = aggstate->csstate.css_ScanTupleSlot; + + /* + * We loop retrieving groups until we find one matching + * node->plan.qual + */ + do + { + if (aggstate->agg_done) + return NULL; + + /* + * Find the next entry in the hash table + */ + entry = aggstate->next_hash_entry; + while (entry == NULL) + { + if (aggstate->next_hash_bucket >= hashtable->nbuckets) + { + /* No more entries in hashtable, so done */ + aggstate->agg_done = TRUE; + return NULL; + } + entry = hashtable->buckets[aggstate->next_hash_bucket++]; + } + aggstate->next_hash_entry = entry->next; + + /* + * Clear the per-output-tuple context for each group + */ + ResetExprContext(econtext); + + /* + * Store the copied first input tuple in the tuple table slot + * reserved for it, so that it can be used in ExecProject. + */ + ExecStoreTuple(entry->firstTuple, + firstSlot, + InvalidBuffer, + false); + + pergroup = entry->pergroup; + + /* + * Finalize each aggregate calculation, and stash results in the + * per-output-tuple context. + */ + for (aggno = 0; aggno < aggstate->numaggs; aggno++) + { + AggStatePerAgg peraggstate = &peragg[aggno]; + AggStatePerGroup pergroupstate = &pergroup[aggno]; + + Assert(!peraggstate->aggref->aggdistinct); + finalize_aggregate(aggstate, peraggstate, pergroupstate, + &aggvalues[aggno], &aggnulls[aggno]); + } /* * Form a projection tuple using the aggregate results and the @@ -789,8 +1104,11 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent) aggstate = makeNode(AggState); node->aggstate = aggstate; aggstate->eqfunctions = NULL; - aggstate->grp_firstTuple = NULL; + aggstate->peragg = NULL; aggstate->agg_done = false; + aggstate->pergroup = NULL; + aggstate->grp_firstTuple = NULL; + aggstate->hashtable = NULL; /* * find aggregates in targetlist and quals @@ -817,33 +1135,27 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent) } /* - * Create expression context + * Create expression contexts. We need two, one for per-input-tuple + * processing and one for per-output-tuple processing. We cheat a little + * by using ExecAssignExprContext() to build both. */ ExecAssignExprContext(estate, &aggstate->csstate.cstate); + aggstate->tmpcontext = aggstate->csstate.cstate.cs_ExprContext; + ExecAssignExprContext(estate, &aggstate->csstate.cstate); /* - * We actually need three separate expression memory contexts: one for - * calculating per-output-tuple values (ie, the finished aggregate - * results), and two that we ping-pong between for per-input-tuple - * evaluation of input expressions and transition functions. The - * context made by ExecAssignExprContext() is used as the output - * context. + * We also need a long-lived memory context for holding hashtable + * data structures and transition values. NOTE: the details of what + * is stored in aggcontext and what is stored in the regular per-query + * memory context are driven by a simple decision: we want to reset the + * aggcontext in ExecReScanAgg to recover no-longer-wanted space. */ - aggstate->tup_cxt = - aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory; - aggstate->agg_cxt[0] = + aggstate->aggcontext = AllocSetContextCreate(CurrentMemoryContext, - "AggExprContext1", + "AggContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - aggstate->agg_cxt[1] = - AllocSetContextCreate(CurrentMemoryContext, - "AggExprContext2", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - aggstate->which_cxt = 0; #define AGG_NSLOTS 2 @@ -854,7 +1166,7 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent) ExecInitResultTupleSlot(estate, &aggstate->csstate.cstate); /* - * Set up aggregate-result storage in the expr context, and also + * Set up aggregate-result storage in the output expr context, and also * allocate my private per-agg working storage */ econtext = aggstate->csstate.cstate.cs_ExprContext; @@ -867,6 +1179,20 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent) MemSet(peragg, 0, sizeof(AggStatePerAggData) * numaggs); aggstate->peragg = peragg; + if (node->aggstrategy == AGG_HASHED) + { + build_hash_table(node); + aggstate->table_filled = false; + } + else + { + AggStatePerGroup pergroup; + + pergroup = (AggStatePerGroup) palloc(sizeof(AggStatePerGroupData) * numaggs); + MemSet(pergroup, 0, sizeof(AggStatePerGroupData) * numaggs); + aggstate->pergroup = pergroup; + } + /* * initialize child nodes */ @@ -984,12 +1310,15 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent) { /* * Note: use the type from the input expression here, not from - * pg_proc.proargtypes, because the latter might be 0. + * pg_proc.proargtypes, because the latter might be a pseudotype. * (Consider COUNT(*).) */ Oid inputType = exprType(aggref->target); Oid eq_function; + /* We don't implement DISTINCT aggs in the HASHED case */ + Assert(node->aggstrategy != AGG_HASHED); + peraggstate->inputType = inputType; get_typlenbyval(inputType, &peraggstate->inputtypeLen, @@ -1055,21 +1384,27 @@ ExecEndAgg(Agg *node) { AggState *aggstate = node->aggstate; Plan *outerPlan; + int aggno; + + /* Make sure we have closed any open tuplesorts */ + for (aggno = 0; aggno < aggstate->numaggs; aggno++) + { + AggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + + if (peraggstate->sortstate) + tuplesort_end(peraggstate->sortstate); + } ExecFreeProjectionInfo(&aggstate->csstate.cstate); /* - * Make sure ExecFreeExprContext() frees the right expr context... + * Free both the expr contexts. */ - aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory = - aggstate->tup_cxt; + ExecFreeExprContext(&aggstate->csstate.cstate); + aggstate->csstate.cstate.cs_ExprContext = aggstate->tmpcontext; ExecFreeExprContext(&aggstate->csstate.cstate); - /* - * ... and I free the others. - */ - MemoryContextDelete(aggstate->agg_cxt[0]); - MemoryContextDelete(aggstate->agg_cxt[1]); + MemoryContextDelete(aggstate->aggcontext); outerPlan = outerPlan(node); ExecEndNode(outerPlan, (Plan *) node); @@ -1088,6 +1423,17 @@ ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent) { AggState *aggstate = node->aggstate; ExprContext *econtext = aggstate->csstate.cstate.cs_ExprContext; + int aggno; + + /* Make sure we have closed any open tuplesorts */ + for (aggno = 0; aggno < aggstate->numaggs; aggno++) + { + AggStatePerAgg peraggstate = &aggstate->peragg[aggno]; + + if (peraggstate->sortstate) + tuplesort_end(peraggstate->sortstate); + peraggstate->sortstate = NULL; + } aggstate->agg_done = false; if (aggstate->grp_firstTuple != NULL) @@ -1098,6 +1444,14 @@ ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent) MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * aggstate->numaggs); MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * aggstate->numaggs); + MemoryContextReset(aggstate->aggcontext); + + if (node->aggstrategy == AGG_HASHED) + { + build_hash_table(node); + aggstate->table_filled = false; + } + /* * if chgParam of subnode is not null then plan will be re-scanned by * first ExecProcNode. diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 662c3d4798..3ea0e44d28 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -15,7 +15,7 @@ * locate group boundaries. * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.49 2002/11/06 22:31:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -151,9 +151,8 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent) */ grpstate = makeNode(GroupState); node->grpstate = grpstate; - grpstate->grp_useFirstTuple = FALSE; - grpstate->grp_done = FALSE; grpstate->grp_firstTuple = NULL; + grpstate->grp_done = FALSE; /* * create expression context @@ -236,7 +235,6 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent) { GroupState *grpstate = node->grpstate; - grpstate->grp_useFirstTuple = FALSE; grpstate->grp_done = FALSE; if (grpstate->grp_firstTuple != NULL) { diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 8bb5bde84c..57faf0622c 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -7,7 +7,8 @@ * Portions Copyright (c) 1994, Regents of the University of California * * - * $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $ + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.67 2002/11/06 22:31:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,8 +32,6 @@ #include "utils/lsyscache.h" -static uint32 hashFunc(Datum key, int typLen, bool byVal); - /* ---------------------------------------------------------------- * ExecHash * @@ -532,7 +531,7 @@ ExecHashGetBucket(HashJoinTable hashtable, /* * We reset the eval context each time to reclaim any memory leaked in - * the hashkey expression or hashFunc itself. + * the hashkey expression or ComputeHashFunc itself. */ ResetExprContext(econtext); @@ -550,9 +549,9 @@ ExecHashGetBucket(HashJoinTable hashtable, bucketno = 0; else { - bucketno = hashFunc(keyval, - (int) hashtable->typLen, - hashtable->typByVal) + bucketno = ComputeHashFunc(keyval, + (int) hashtable->typLen, + hashtable->typByVal) % (uint32) hashtable->totalbuckets; } @@ -622,16 +621,16 @@ ExecScanHashBucket(HashJoinState *hjstate, } /* ---------------------------------------------------------------- - * hashFunc + * ComputeHashFunc * - * the hash function for hash joins + * the hash function for hash joins (also used for hash aggregation) * * XXX this probably ought to be replaced with datatype-specific * hash functions, such as those already implemented for hash indexes. * ---------------------------------------------------------------- */ -static uint32 -hashFunc(Datum key, int typLen, bool byVal) +uint32 +ComputeHashFunc(Datum key, int typLen, bool byVal) { unsigned char *k; @@ -681,7 +680,7 @@ hashFunc(Datum key, int typLen, bool byVal) } else { - elog(ERROR, "hashFunc: Invalid typLen %d", typLen); + elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen); k = NULL; /* keep compiler quiet */ } } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 0438e0ce60..447d560064 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.215 2002/11/06 00:00:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.216 2002/11/06 22:31:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -524,6 +524,7 @@ _copyAgg(Agg *from) memcpy(newnode->grpColIdx, from->grpColIdx, from->numCols * sizeof(AttrNumber)); } + newnode->numGroups = from->numGroups; return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 2d6db222b2..b35763f23d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -5,7 +5,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.177 2002/11/06 00:00:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.178 2002/11/06 22:31:24 tgl Exp $ * * NOTES * Every (plan) node in POSTGRES has an associated "out" routine which @@ -597,8 +597,8 @@ _outAgg(StringInfo str, Agg *node) { appendStringInfo(str, " AGG "); _outPlanInfo(str, (Plan *) node); - appendStringInfo(str, " :aggstrategy %d :numCols %d ", - (int) node->aggstrategy, node->numCols); + appendStringInfo(str, " :aggstrategy %d :numCols %d :numGroups %ld ", + (int) node->aggstrategy, node->numCols, node->numGroups); } static void diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 5a2acbd276..cba1b2027d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.121 2002/11/06 22:31:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1675,6 +1675,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy, plan->plan_rows *= 0.1; if (plan->plan_rows < 1) plan->plan_rows = 1; + node->numGroups = (long) plan->plan_rows; } plan->state = (EState *) NULL; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index cc8e7a698d..7e722d6a09 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.126 2002/11/06 00:00:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.127 2002/11/06 22:31:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -931,6 +931,7 @@ grouping_planner(Query *parse, double tuple_fraction) AttrNumber *groupColIdx = NULL; Path *cheapest_path; Path *sorted_path; + bool use_hashed_grouping = false; /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ tlist = preprocess_targetlist(tlist, @@ -1209,6 +1210,29 @@ grouping_planner(Query *parse, double tuple_fraction) group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); + /* + * Consider whether we might want to use hashed grouping. + */ + if (parse->groupClause) + { + /* + * Executor doesn't support hashed aggregation with DISTINCT + * aggregates. (Doing so would imply storing *all* the input + * values in the hash table, which seems like a certain loser.) + */ + if (parse->hasAggs && + (contain_distinct_agg_clause((Node *) tlist) || + contain_distinct_agg_clause(parse->havingQual))) + use_hashed_grouping = false; + else + { +#if 0 /* much more to do here */ + /* TEMPORARY HOTWIRE FOR TESTING */ + use_hashed_grouping = true; +#endif + } + } + /* * Select the best path and create a plan to execute it. * @@ -1279,22 +1303,30 @@ grouping_planner(Query *parse, double tuple_fraction) } /* - * If any aggregate is present, insert the Agg node, plus an explicit - * sort if necessary. + * Insert AGG or GROUP node if needed, plus an explicit sort step + * if necessary. * * HAVING clause, if any, becomes qual of the Agg node */ - if (parse->hasAggs) + if (use_hashed_grouping) { + /* Hashed aggregate plan --- no sort needed */ + result_plan = (Plan *) make_agg(tlist, + (List *) parse->havingQual, + AGG_HASHED, + length(parse->groupClause), + groupColIdx, + result_plan); + /* Hashed aggregation produces randomly-ordered results */ + current_pathkeys = NIL; + } + else if (parse->hasAggs) + { + /* Plain aggregate plan --- sort if needed */ AggStrategy aggstrategy; if (parse->groupClause) { - aggstrategy = AGG_SORTED; - /* - * Add an explicit sort if we couldn't make the path come out - * the way the AGG node needs it. - */ if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) { result_plan = make_groupsortplan(parse, @@ -1303,9 +1335,18 @@ grouping_planner(Query *parse, double tuple_fraction) result_plan); current_pathkeys = group_pathkeys; } + aggstrategy = AGG_SORTED; + /* + * The AGG node will not change the sort ordering of its + * groups, so current_pathkeys describes the result too. + */ } else + { aggstrategy = AGG_PLAIN; + /* Result will be only one row anyway; no sort order */ + current_pathkeys = NIL; + } result_plan = (Plan *) make_agg(tlist, (List *) parse->havingQual, @@ -1313,10 +1354,6 @@ grouping_planner(Query *parse, double tuple_fraction) length(parse->groupClause), groupColIdx, result_plan); - /* - * Note: plain or grouped Agg does not affect any existing - * sort order of the tuples - */ } else { diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index ee03797476..f55c988bfc 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.109 2002/09/11 14:48:54 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.110 2002/11/06 22:31:24 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -46,6 +46,7 @@ typedef struct } check_subplans_for_ungrouped_vars_context; static bool contain_agg_clause_walker(Node *node, void *context); +static bool contain_distinct_agg_clause_walker(Node *node, void *context); static bool pull_agg_clause_walker(Node *node, List **listptr); static bool expression_returns_set_walker(Node *node, void *context); static bool contain_subplans_walker(Node *node, void *context); @@ -410,6 +411,32 @@ contain_agg_clause_walker(Node *node, void *context) return expression_tree_walker(node, contain_agg_clause_walker, context); } +/* + * contain_distinct_agg_clause + * Recursively search for DISTINCT Aggref nodes within a clause. + * + * Returns true if any DISTINCT aggregate found. + */ +bool +contain_distinct_agg_clause(Node *clause) +{ + return contain_distinct_agg_clause_walker(clause, NULL); +} + +static bool +contain_distinct_agg_clause_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + if (((Aggref *) node)->aggdistinct) + return true; /* abort the tree traversal and return + * true */ + } + return expression_tree_walker(node, contain_distinct_agg_clause_walker, context); +} + /* * pull_agg_clause * Recursively pulls all Aggref nodes from an expression tree. diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index aed6bb0cf6..8bea51e8af 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodeHash.h,v 1.24 2002/06/20 20:29:49 momjian Exp $ + * $Id: nodeHash.h,v 1.25 2002/11/06 22:31:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,5 +36,6 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth, int *virtualbuckets, int *physicalbuckets, int *numbatches); +extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal); #endif /* NODEHASH_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 533d296186..f62d1cb815 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: execnodes.h,v 1.76 2002/11/06 00:00:44 tgl Exp $ + * $Id: execnodes.h,v 1.77 2002/11/06 22:31:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -661,12 +661,18 @@ typedef struct MaterialState * * csstate.css_ScanTupleSlot refers to output of underlying plan. * - * Note: the associated ExprContext contains ecxt_aggvalues and ecxt_aggnulls - * arrays, which hold the computed agg values for the current input group - * during evaluation of an Agg node's output tuple(s). + * Note: csstate.cstate.cs_ExprContext contains ecxt_aggvalues and + * ecxt_aggnulls arrays, which hold the computed agg values for the current + * input group during evaluation of an Agg node's output tuple(s). We + * create a second ExprContext, tmpcontext, in which to evaluate input + * expressions and run the aggregate transition functions. * ------------------------- */ -typedef struct AggStatePerAggData *AggStatePerAgg; /* private in nodeAgg.c */ +/* these structs are private in nodeAgg.c: */ +typedef struct AggStatePerAggData *AggStatePerAgg; +typedef struct AggStatePerGroupData *AggStatePerGroup; +typedef struct AggHashEntryData *AggHashEntry; +typedef struct AggHashTableData *AggHashTable; typedef struct AggState { @@ -674,13 +680,18 @@ typedef struct AggState List *aggs; /* all Aggref nodes in targetlist & quals */ int numaggs; /* length of list (could be zero!) */ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ - HeapTuple grp_firstTuple; /* copy of first tuple of current group */ - AggStatePerAgg peragg; /* per-Aggref working state */ - MemoryContext tup_cxt; /* context for per-output-tuple - * expressions */ - MemoryContext agg_cxt[2]; /* pair of expression eval memory contexts */ - int which_cxt; /* 0 or 1, indicates current agg_cxt */ + AggStatePerAgg peragg; /* per-Aggref information */ + MemoryContext aggcontext; /* memory context for long-lived data */ + ExprContext *tmpcontext; /* econtext for input expressions */ bool agg_done; /* indicates completion of Agg scan */ + /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ + AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ + HeapTuple grp_firstTuple; /* copy of first tuple of current group */ + /* these fields are used in AGG_HASHED mode: */ + AggHashTable hashtable; /* hash table with one entry per group */ + bool table_filled; /* hash table filled yet? */ + AggHashEntry next_hash_entry; /* next entry in current chain */ + int next_hash_bucket; /* next chain */ } AggState; /* --------------------- @@ -691,9 +702,8 @@ typedef struct GroupState { CommonScanState csstate; /* its first field is NodeTag */ FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */ - bool grp_useFirstTuple; /* first tuple not processed yet */ - bool grp_done; HeapTuple grp_firstTuple; /* copy of first tuple of current group */ + bool grp_done; /* indicates completion of Group scan */ } GroupState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 63c8f20d80..0cf9d0bac9 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: plannodes.h,v 1.59 2002/11/06 00:00:44 tgl Exp $ + * $Id: plannodes.h,v 1.60 2002/11/06 22:31:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -349,6 +349,7 @@ typedef struct Agg AggStrategy aggstrategy; int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ + long numGroups; /* estimated number of groups in input */ AggState *aggstate; } Agg; diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 844e7d9490..1cf8fbaf83 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: clauses.h,v 1.54 2002/09/11 14:48:55 tgl Exp $ + * $Id: clauses.h,v 1.55 2002/11/06 22:31:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,6 +40,7 @@ extern Expr *make_ands_explicit(List *andclauses); extern List *make_ands_implicit(Expr *clause); extern bool contain_agg_clause(Node *clause); +extern bool contain_distinct_agg_clause(Node *clause); extern List *pull_agg_clause(Node *clause); extern bool expression_returns_set(Node *clause);