mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
Make GROUP BY work properly for datatypes that only support hashing and not
sorting. The infrastructure for this was all in place already; it's only necessary to fix the planner to not assume that sorting is always an available option.
This commit is contained in:
parent
82a1f09953
commit
ec73b56a31
@ -14,7 +14,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -288,8 +288,7 @@ query_planner(PlannerInfo *root, List *tlist,
|
||||
* levels of sort --- and, therefore, certainly need to read all the
|
||||
* tuples --- unless ORDER BY is a subset of GROUP BY.
|
||||
*/
|
||||
if (root->group_pathkeys && root->sort_pathkeys &&
|
||||
!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
|
||||
if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
|
||||
tuple_fraction = 0.0;
|
||||
}
|
||||
else if (parse->hasAggs || root->hasHavingQual)
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -69,11 +69,12 @@ static double preprocess_limit(PlannerInfo *root,
|
||||
int64 *offset_est, int64 *count_est);
|
||||
static void preprocess_groupclause(PlannerInfo *root);
|
||||
static Oid *extract_grouping_ops(List *groupClause);
|
||||
static bool grouping_is_sortable(List *groupClause);
|
||||
static bool grouping_is_hashable(List *groupClause);
|
||||
static bool choose_hashed_grouping(PlannerInfo *root,
|
||||
double tuple_fraction, double limit_tuples,
|
||||
Path *cheapest_path, Path *sorted_path,
|
||||
Oid *groupOperators, double dNumGroups,
|
||||
AggClauseCounts *agg_counts);
|
||||
double dNumGroups, AggClauseCounts *agg_counts);
|
||||
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
|
||||
AttrNumber **groupColIdx, bool *need_tlist_eval);
|
||||
static void locate_grouping_columns(PlannerInfo *root,
|
||||
@ -839,7 +840,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
List *sub_tlist;
|
||||
List *group_pathkeys;
|
||||
AttrNumber *groupColIdx = NULL;
|
||||
Oid *groupOperators = NULL;
|
||||
bool need_tlist_eval = true;
|
||||
QualCost tlist_cost;
|
||||
Path *cheapest_path;
|
||||
@ -877,11 +877,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
* DISTINCT and ORDER BY requirements. This should be changed
|
||||
* someday, but DISTINCT ON is a bit of a problem ...
|
||||
*/
|
||||
root->group_pathkeys =
|
||||
make_pathkeys_for_sortclauses(root,
|
||||
parse->groupClause,
|
||||
tlist,
|
||||
false);
|
||||
if (parse->groupClause && grouping_is_sortable(parse->groupClause))
|
||||
root->group_pathkeys =
|
||||
make_pathkeys_for_sortclauses(root,
|
||||
parse->groupClause,
|
||||
tlist,
|
||||
false);
|
||||
else
|
||||
root->group_pathkeys = NIL;
|
||||
|
||||
if (list_length(parse->distinctClause) > list_length(parse->sortClause))
|
||||
root->sort_pathkeys =
|
||||
make_pathkeys_for_sortclauses(root,
|
||||
@ -915,12 +919,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
/*
|
||||
* Figure out whether we need a sorted result from query_planner.
|
||||
*
|
||||
* If we have a GROUP BY clause, then we want a result sorted properly
|
||||
* for grouping. Otherwise, if there is an ORDER BY clause, we want
|
||||
* to sort by the ORDER BY clause. (Note: if we have both, and ORDER
|
||||
* BY is a superset of GROUP BY, it would be tempting to request sort
|
||||
* by ORDER BY --- but that might just leave us failing to exploit an
|
||||
* available sort order at all. Needs more thought...)
|
||||
* If we have a sortable GROUP BY clause, then we want a result sorted
|
||||
* properly for grouping. Otherwise, if there is an ORDER BY clause,
|
||||
* we want to sort by the ORDER BY clause. (Note: if we have both, and
|
||||
* ORDER BY is a superset of GROUP BY, it would be tempting to request
|
||||
* sort by ORDER BY --- but that might just leave us failing to
|
||||
* exploit an available sort order at all. Needs more thought...)
|
||||
*/
|
||||
if (root->group_pathkeys)
|
||||
root->query_pathkeys = root->group_pathkeys;
|
||||
@ -942,17 +946,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
sort_pathkeys = root->sort_pathkeys;
|
||||
|
||||
/*
|
||||
* If grouping, extract the grouping operators and decide whether we
|
||||
* want to use hashed grouping.
|
||||
* If grouping, decide whether to use sorted or hashed grouping.
|
||||
*/
|
||||
if (parse->groupClause)
|
||||
{
|
||||
groupOperators = extract_grouping_ops(parse->groupClause);
|
||||
use_hashed_grouping =
|
||||
choose_hashed_grouping(root, tuple_fraction, limit_tuples,
|
||||
cheapest_path, sorted_path,
|
||||
groupOperators, dNumGroups,
|
||||
&agg_counts);
|
||||
bool can_hash;
|
||||
bool can_sort;
|
||||
|
||||
/*
|
||||
* Executor doesn't support hashed aggregation with DISTINCT
|
||||
* aggregates. (Doing so would imply storing *all* the input
|
||||
* values in the hash table, which seems like a certain loser.)
|
||||
*/
|
||||
can_hash = (agg_counts.numDistinctAggs == 0 &&
|
||||
grouping_is_hashable(parse->groupClause));
|
||||
can_sort = grouping_is_sortable(parse->groupClause);
|
||||
if (can_hash && can_sort)
|
||||
{
|
||||
/* we have a meaningful choice to make ... */
|
||||
use_hashed_grouping =
|
||||
choose_hashed_grouping(root,
|
||||
tuple_fraction, limit_tuples,
|
||||
cheapest_path, sorted_path,
|
||||
dNumGroups, &agg_counts);
|
||||
}
|
||||
else if (can_hash)
|
||||
use_hashed_grouping = true;
|
||||
else if (can_sort)
|
||||
use_hashed_grouping = false;
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("could not implement GROUP BY"),
|
||||
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
|
||||
|
||||
/* Also convert # groups to long int --- but 'ware overflow! */
|
||||
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
|
||||
@ -1088,7 +1114,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
AGG_HASHED,
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
groupOperators,
|
||||
extract_grouping_ops(parse->groupClause),
|
||||
numGroups,
|
||||
agg_counts.numAggs,
|
||||
result_plan);
|
||||
@ -1131,7 +1157,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
aggstrategy,
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
groupOperators,
|
||||
extract_grouping_ops(parse->groupClause),
|
||||
numGroups,
|
||||
agg_counts.numAggs,
|
||||
result_plan);
|
||||
@ -1160,7 +1186,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
(List *) parse->havingQual,
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
groupOperators,
|
||||
extract_grouping_ops(parse->groupClause),
|
||||
dNumGroups,
|
||||
result_plan);
|
||||
/* The Group node won't change sort ordering */
|
||||
@ -1495,6 +1521,9 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
|
||||
* GROUP BY elements, which could match the sort ordering of other
|
||||
* possible plans (eg an indexscan) and thereby reduce cost. We don't
|
||||
* bother with that, though. Hashed grouping will frequently win anyway.
|
||||
*
|
||||
* Note: we need no comparable processing of the distinctClause because
|
||||
* the parser already enforced that that matches ORDER BY.
|
||||
*/
|
||||
static void
|
||||
preprocess_groupclause(PlannerInfo *root)
|
||||
@ -1505,7 +1534,7 @@ preprocess_groupclause(PlannerInfo *root)
|
||||
ListCell *sl;
|
||||
ListCell *gl;
|
||||
|
||||
/* If no ORDER BY, nothing useful to do here anyway */
|
||||
/* If no ORDER BY, nothing useful to do here */
|
||||
if (parse->sortClause == NIL)
|
||||
return;
|
||||
|
||||
@ -1546,7 +1575,8 @@ preprocess_groupclause(PlannerInfo *root)
|
||||
* were able to make a complete match. In other words, we only
|
||||
* rearrange the GROUP BY list if the result is that one list is a
|
||||
* prefix of the other --- otherwise there's no possibility of a
|
||||
* common sort.
|
||||
* common sort. Also, give up if there are any non-sortable GROUP BY
|
||||
* items, since then there's no hope anyway.
|
||||
*/
|
||||
foreach(gl, parse->groupClause)
|
||||
{
|
||||
@ -1556,6 +1586,8 @@ preprocess_groupclause(PlannerInfo *root)
|
||||
continue; /* it matched an ORDER BY item */
|
||||
if (partial_match)
|
||||
return; /* give up, no common sort possible */
|
||||
if (!OidIsValid(gc->sortop))
|
||||
return; /* give up, GROUP BY can't be sorted */
|
||||
new_groupclause = lappend(new_groupclause, gc);
|
||||
}
|
||||
|
||||
@ -1566,7 +1598,7 @@ preprocess_groupclause(PlannerInfo *root)
|
||||
|
||||
/*
|
||||
* extract_grouping_ops - make an array of the equality operator OIDs
|
||||
* for the GROUP BY clause
|
||||
* for a SortGroupClause list
|
||||
*/
|
||||
static Oid *
|
||||
extract_grouping_ops(List *groupClause)
|
||||
@ -1590,15 +1622,59 @@ extract_grouping_ops(List *groupClause)
|
||||
return groupOperators;
|
||||
}
|
||||
|
||||
/*
|
||||
* grouping_is_sortable - is it possible to implement grouping list by sorting?
|
||||
*
|
||||
* This is easy since the parser will have included a sortop if one exists.
|
||||
*/
|
||||
static bool
|
||||
grouping_is_sortable(List *groupClause)
|
||||
{
|
||||
ListCell *glitem;
|
||||
|
||||
foreach(glitem, groupClause)
|
||||
{
|
||||
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
|
||||
|
||||
if (!OidIsValid(groupcl->sortop))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* grouping_is_hashable - is it possible to implement grouping list by hashing?
|
||||
*
|
||||
* We assume hashing is OK if the equality operators are marked oprcanhash.
|
||||
* (If there isn't actually a supporting hash function, the executor will
|
||||
* complain at runtime; but this is a misdeclaration of the operator, not
|
||||
* a system bug.)
|
||||
*/
|
||||
static bool
|
||||
grouping_is_hashable(List *groupClause)
|
||||
{
|
||||
ListCell *glitem;
|
||||
|
||||
foreach(glitem, groupClause)
|
||||
{
|
||||
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
|
||||
|
||||
if (!op_hashjoinable(groupcl->eqop))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* choose_hashed_grouping - should we use hashed grouping?
|
||||
*
|
||||
* Note: this is only applied when both alternatives are actually feasible.
|
||||
*/
|
||||
static bool
|
||||
choose_hashed_grouping(PlannerInfo *root,
|
||||
double tuple_fraction, double limit_tuples,
|
||||
Path *cheapest_path, Path *sorted_path,
|
||||
Oid *groupOperators, double dNumGroups,
|
||||
AggClauseCounts *agg_counts)
|
||||
double dNumGroups, AggClauseCounts *agg_counts)
|
||||
{
|
||||
int numGroupCols = list_length(root->parse->groupClause);
|
||||
double cheapest_path_rows;
|
||||
@ -1607,27 +1683,10 @@ choose_hashed_grouping(PlannerInfo *root,
|
||||
List *current_pathkeys;
|
||||
Path hashed_p;
|
||||
Path sorted_p;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Check can't-do-it conditions, including whether the grouping operators
|
||||
* are hashjoinable. (We assume hashing is OK if they are marked
|
||||
* oprcanhash. If there isn't actually a supporting hash function, the
|
||||
* executor will complain at runtime.)
|
||||
*
|
||||
* Executor doesn't support hashed aggregation with DISTINCT aggregates.
|
||||
* (Doing so would imply storing *all* the input values in the hash table,
|
||||
* which seems like a certain loser.)
|
||||
*/
|
||||
/* Prefer sorting when enable_hashagg is off */
|
||||
if (!enable_hashagg)
|
||||
return false;
|
||||
if (agg_counts->numDistinctAggs != 0)
|
||||
return false;
|
||||
for (i = 0; i < numGroupCols; i++)
|
||||
{
|
||||
if (!op_hashjoinable(groupOperators[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't do it if it doesn't look like the hashtable will fit into
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -1351,15 +1351,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
|
||||
/*
|
||||
* If no match in ORDER BY, just add it to the result using
|
||||
* default sort/group semantics.
|
||||
*
|
||||
* XXX for now, the planner requires groupClause to be sortable,
|
||||
* so we have to insist on that here.
|
||||
*/
|
||||
if (!found)
|
||||
result = addTargetToGroupList(pstate, tle,
|
||||
result, *targetlist,
|
||||
true, /* XXX for now */
|
||||
true);
|
||||
false, true);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
Loading…
Reference in New Issue
Block a user