Restructure planning code so that preprocessing of targetlist and quals

to simplify constant expressions and expand SubLink nodes into SubPlans
is done in a separate routine subquery_planner() that calls union_planner().
We formerly did most of this work in query_planner(), but that's the
wrong place because it may never see the real targetlist.  Splitting
union_planner into two routines also allows us to avoid redundant work
when union_planner is invoked recursively for UNION and inheritance
cases.  Upshot is that it is now possible to do something like
select float8(count(*)) / (select count(*) from int4_tbl)  from int4_tbl
group by f1;
which has never worked before.
This commit is contained in:
Tom Lane 2000-03-21 05:12:12 +00:00
parent aafe86d995
commit 3ee8f7e207
8 changed files with 191 additions and 166 deletions

View File

@ -132,14 +132,14 @@ than applying a sort to the cheapest other path).
Optimizer Functions
-------------------
The primary entry point is planner().
planner()
handle inheritance by processing separately
-init_query_planner()
preprocess target list
preprocess qualifications(WHERE)
--query_planner()
simplify constant subexpressions
canonicalize_qual()
set up for recursive handling of subqueries
do final cleanup after planning.
-subquery_planner()
simplify constant expressions
canonicalize qual
Attempt to reduce WHERE clause to either CNF or DNF canonical form.
CNF (top-level-AND) is preferred, since the optimizer can then use
any of the AND subclauses to filter tuples; but quals that are in
@ -147,6 +147,13 @@ planner()
force them to CNF. In pathological cases either transform may expand
the qual unreasonably; so we may have to leave it un-normalized,
thereby reducing the accuracy of selectivity estimates.
process sublinks
convert Vars of outer query levels into Params
--union_planner()
handle unions and inheritance by mutual recursion with prepunion.c routines
preprocess target list
handle GROUP BY, HAVING, aggregates, ORDER BY, DISTINCT
--query_planner()
pull out constants from target list
get a target list that only contains column names, no expressions
if none, then return

View File

@ -3,29 +3,31 @@
* planmain.c
* Routines to plan a single query
*
* What's in a name, anyway? The top-level entry point of the planner/
* optimizer is over in planner.c, not here as you might think from the
* file name. But this is the main code for planning a basic join operation,
* shorn of features like subselects, inheritance, aggregates, grouping,
* and so on. (Those are the things planner.c deals with.)
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.52 2000/02/15 20:49:18 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.53 2000/03/21 05:11:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include <sys/types.h>
#include "postgres.h"
#include <sys/types.h>
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
#include "optimizer/tlist.h"
#include "utils/lsyscache.h"
static Plan *subplanner(Query *root, List *flat_tlist, List *qual,
@ -34,19 +36,15 @@ static Plan *subplanner(Query *root, List *flat_tlist, List *qual,
/*--------------------
* query_planner
* Routine to create a query plan. It does so by first creating a
* subplan for the topmost level of attributes in the query. Then,
* it modifies all target list and qualifications to consider the next
* level of nesting and creates a plan for this modified query by
* recursively calling itself. The two pieces are then merged together
* by creating a result node that indicates which attributes should
* be placed where and any relation level qualifications to be
* satisfied.
* Generate a plan for a basic query, which may involve joins but
* not any fancier features.
*
* tlist is the target list of the query (do NOT use root->targetList!)
* tlist is the target list the query should produce (NOT root->targetList!)
* qual is the qualification of the query (likewise!)
* tuple_fraction is the fraction of tuples we expect will be retrieved
*
* qual must already have been converted to implicit-AND form.
*
* Note: the Query node now also includes a query_pathkeys field, which
* is both an input and an output of query_planner(). The input value
* signals query_planner that the indicated sort order is wanted in the
@ -83,46 +81,6 @@ query_planner(Query *root,
List *var_only_tlist;
Plan *subplan;
/*
* Note: union_planner should already have done constant folding
* in both the tlist and qual, so we don't do it again here
* (indeed, we may be getting a flattened var-only tlist anyway).
*
* Is there any value in re-folding the qual after canonicalize_qual?
*/
/*
* Canonicalize the qual, and convert it to implicit-AND format.
*/
qual = canonicalize_qual((Expr *) qual, true);
#ifdef OPTIMIZER_DEBUG
printf("After canonicalize_qual()\n");
pprint(qual);
#endif
/* Replace uplevel vars with Param nodes */
if (PlannerQueryLevel > 1)
{
tlist = (List *) SS_replace_correlation_vars((Node *) tlist);
qual = (List *) SS_replace_correlation_vars((Node *) qual);
}
/* Expand SubLinks to SubPlans */
if (root->hasSubLinks)
{
tlist = (List *) SS_process_sublinks((Node *) tlist);
qual = (List *) SS_process_sublinks((Node *) qual);
if (root->groupClause != NIL)
{
/*
* Check for ungrouped variables passed to subplans.
* Note we do NOT do this for subplans in WHERE; it's legal
* there because WHERE is evaluated pre-GROUP.
*/
check_subplans_for_ungrouped_vars((Node *) tlist, root, tlist);
}
}
/*
* If the query contains no relation references at all, it must be
* something like "SELECT 2+2;". Build a trivial "Result" plan.
@ -192,16 +150,6 @@ query_planner(Query *root,
}
return subplan;
#ifdef NOT_USED
/*
* Destructively modify the query plan's targetlist to add fjoin lists
* to flatten functions that return sets of base types
*/
subplan->targetlist = generate_fjoin(subplan->targetlist);
#endif
}
/*

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.77 2000/03/14 02:23:15 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.78 2000/03/21 05:12:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -36,6 +36,7 @@
#include "utils/lsyscache.h"
#include "utils/syscache.h"
static List *make_subplanTargetList(Query *parse, List *tlist,
AttrNumber **groupColIdx);
static Plan *make_groupplan(List *group_tlist, bool tuplePerGroup,
@ -59,23 +60,154 @@ planner(Query *parse)
PlannerParamVar = NULL;
PlannerPlanId = 0;
/* this should go away sometime soon */
transformKeySetQuery(parse);
result_plan = union_planner(parse, -1.0 /* default case */);
/* primary planning entry point (may recurse for subplans) */
result_plan = subquery_planner(parse, -1.0 /* default case */);
Assert(PlannerQueryLevel == 1);
/* if top-level query had subqueries, do housekeeping for them */
if (PlannerPlanId > 0)
{
result_plan->initPlan = PlannerInitPlan;
(void) SS_finalize_plan(result_plan);
result_plan->initPlan = PlannerInitPlan;
}
/* executor wants to know total number of Params used overall */
result_plan->nParamExec = length(PlannerParamVar);
/* final cleanup of the plan */
set_plan_references(result_plan);
return result_plan;
}
/*--------------------
* subquery_planner
* Invokes the planner on a subquery. We recurse to here for each
* sub-SELECT found in the query tree.
*
* parse is the querytree produced by the parser & rewriter.
* tuple_fraction is the fraction of tuples we expect will be retrieved.
* tuple_fraction is interpreted as explained for union_planner, below.
*
* Basically, this routine does the stuff that should only be done once
* per Query object. It then calls union_planner, which may be called
* recursively on the same Query node in order to handle UNIONs and/or
* inheritance. subquery_planner is called recursively from subselect.c.
*
* prepunion.c uses an unholy combination of calling union_planner when
* recursing on the primary Query node, or subquery_planner when recursing
* on a UNION'd Query node that hasn't previously been seen by
* subquery_planner. That whole chunk of code needs rewritten from scratch.
*
* Returns a query plan.
*--------------------
*/
Plan *
subquery_planner(Query *parse, double tuple_fraction)
{
/*
* A HAVING clause without aggregates is equivalent to a WHERE clause
* (except it can only refer to grouped fields). If there are no
* aggs anywhere in the query, then we don't want to create an Agg
* plan node, so merge the HAVING condition into WHERE. (We used to
* consider this an error condition, but it seems to be legal SQL.)
*/
if (parse->havingQual != NULL && ! parse->hasAggs)
{
if (parse->qual == NULL)
parse->qual = parse->havingQual;
else
parse->qual = (Node *) make_andclause(lappend(lcons(parse->qual,
NIL),
parse->havingQual));
parse->havingQual = NULL;
}
/*
* Simplify constant expressions in targetlist and quals.
*
* Note that at this point the qual has not yet been converted to
* implicit-AND form, so we can apply eval_const_expressions directly.
* Also note that we need to do this before SS_process_sublinks,
* because that routine inserts bogus "Const" nodes.
*/
parse->targetList = (List *)
eval_const_expressions((Node *) parse->targetList);
parse->qual = eval_const_expressions(parse->qual);
parse->havingQual = eval_const_expressions(parse->havingQual);
/*
* Canonicalize the qual, and convert it to implicit-AND format.
*
* XXX Is there any value in re-applying eval_const_expressions
* after canonicalize_qual?
*/
parse->qual = (Node *) canonicalize_qual((Expr *) parse->qual, true);
#ifdef OPTIMIZER_DEBUG
printf("After canonicalize_qual()\n");
pprint(parse->qual);
#endif
/*
* Ditto for the havingQual
*/
parse->havingQual = (Node *) canonicalize_qual((Expr *) parse->havingQual,
true);
/* Expand SubLinks to SubPlans */
if (parse->hasSubLinks)
{
parse->targetList = (List *)
SS_process_sublinks((Node *) parse->targetList);
parse->qual = SS_process_sublinks(parse->qual);
parse->havingQual = SS_process_sublinks(parse->havingQual);
if (parse->groupClause != NIL)
{
/*
* Check for ungrouped variables passed to subplans.
* Note we do NOT do this for subplans in WHERE; it's legal
* there because WHERE is evaluated pre-GROUP.
*
* An interesting fine point: if we reassigned a HAVING qual
* into WHERE above, then we will accept references to ungrouped
* vars from subplans in the HAVING qual. This is not entirely
* consistent, but it doesn't seem particularly harmful...
*/
check_subplans_for_ungrouped_vars((Node *) parse->targetList,
parse);
check_subplans_for_ungrouped_vars(parse->havingQual, parse);
}
}
/* Replace uplevel vars with Param nodes */
if (PlannerQueryLevel > 1)
{
parse->targetList = (List *)
SS_replace_correlation_vars((Node *) parse->targetList);
parse->qual = SS_replace_correlation_vars(parse->qual);
parse->havingQual = SS_replace_correlation_vars(parse->havingQual);
}
/* Do the main planning (potentially recursive) */
return union_planner(parse, tuple_fraction);
/*
* XXX should any more of union_planner's activity be moved here?
*
* That would take careful study of the interactions with prepunion.c,
* but I suspect it would pay off in simplicity and avoidance of
* wasted cycles.
*/
}
/*--------------------
* union_planner
* Invokes the planner on union-type queries (both regular UNIONs and
@ -111,37 +243,6 @@ union_planner(Query *parse,
List *sort_pathkeys;
Index rt_index;
/*
* A HAVING clause without aggregates is equivalent to a WHERE clause
* (except it can only refer to grouped fields). If there are no
* aggs anywhere in the query, then we don't want to create an Agg
* plan node, so merge the HAVING condition into WHERE. (We used to
* consider this an error condition, but it seems to be legal SQL.)
*/
if (parse->havingQual != NULL && ! parse->hasAggs)
{
if (parse->qual == NULL)
parse->qual = parse->havingQual;
else
parse->qual = (Node *) make_andclause(lappend(lcons(parse->qual,
NIL),
parse->havingQual));
parse->havingQual = NULL;
}
/*
* Simplify constant expressions in targetlist and quals.
*
* Note that at this point the qual has not yet been converted to
* implicit-AND form, so we can apply eval_const_expressions directly.
* Also note that we need to do this before SS_process_sublinks,
* because that routine inserts bogus "Const" nodes.
*/
tlist = (List *) eval_const_expressions((Node *) tlist);
parse->qual = eval_const_expressions(parse->qual);
parse->havingQual = eval_const_expressions(parse->havingQual);
if (parse->unionClause)
{
result_plan = (Plan *) plan_union_queries(parse);
@ -474,32 +575,6 @@ union_planner(Query *parse,
result_plan);
}
/*
* If we have a HAVING clause, do the necessary things with it.
* This code should parallel query_planner()'s initial processing
* of the WHERE clause.
*/
if (parse->havingQual)
{
/* Convert the havingQual to implicit-AND normal form */
parse->havingQual = (Node *)
canonicalize_qual((Expr *) parse->havingQual, true);
/* Replace uplevel Vars with Params */
if (PlannerQueryLevel > 1)
parse->havingQual = SS_replace_correlation_vars(parse->havingQual);
if (parse->hasSubLinks)
{
/* Expand SubLinks to SubPlans */
parse->havingQual = SS_process_sublinks(parse->havingQual);
/* Check for ungrouped variables passed to subplans */
check_subplans_for_ungrouped_vars(parse->havingQual,
parse,
parse->targetList);
}
}
/*
* If aggregate is present, insert the Agg node
*

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.32 2000/03/17 02:36:15 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.33 2000/03/21 05:12:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -163,7 +163,7 @@ make_subplan(SubLink *slink)
else
tuple_fraction = -1.0; /* default behavior */
node->plan = plan = union_planner(subquery, tuple_fraction);
node->plan = plan = subquery_planner(subquery, tuple_fraction);
/*
* Assign subPlan, extParam and locParam to plan nodes. At the moment,

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.46 2000/03/14 23:06:29 thomas Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.47 2000/03/21 05:12:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -149,8 +149,12 @@ plan_union_queries(Query *parse)
{
Query *union_query = lfirst(ulist);
/* use subquery_planner here because the union'd queries
* have not been preprocessed yet. My goodness this is messy...
*/
union_plans = lappend(union_plans,
union_planner(union_query, tuple_fraction));
subquery_planner(union_query,
tuple_fraction));
union_rts = lappend(union_rts, union_query->rtable);
}
}
@ -185,8 +189,11 @@ plan_union_queries(Query *parse)
{
Query *union_all_query = lfirst(ulist);
/* use subquery_planner here because the union'd queries
* have not been preprocessed yet. My goodness this is messy...
*/
union_plans = lappend(union_plans,
union_planner(union_all_query, -1.0));
subquery_planner(union_all_query, -1.0));
union_rts = lappend(union_rts, union_all_query->rtable);
}
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.62 2000/03/19 18:20:38 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.63 2000/03/21 05:12:12 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@ -41,15 +41,10 @@
((Node *) makeConst(BOOLOID, 1, (Datum) (val), \
(isnull), true, false, false))
typedef struct {
Query *query;
List *targetList;
} check_subplans_for_ungrouped_vars_context;
static bool contain_agg_clause_walker(Node *node, void *context);
static bool pull_agg_clause_walker(Node *node, List **listptr);
static bool check_subplans_for_ungrouped_vars_walker(Node *node,
check_subplans_for_ungrouped_vars_context *context);
Query *context);
static int is_single_func(Node *node);
static Node *eval_const_expressions_mutator (Node *node, void *context);
static Expr *simplify_op_or_func(Expr *expr, List *args);
@ -467,30 +462,24 @@ pull_agg_clause_walker(Node *node, List **listptr)
* In most contexts, ungrouped variables will be detected by the parser (see
* parse_agg.c, check_ungrouped_columns()). But that routine currently does
* not check subplans, because the necessary info is not computed until the
* planner runs. So we do it here, after we have processed the subplan.
* This ought to be cleaned up someday.
* planner runs. So we do it here, after we have processed sublinks into
* subplans. This ought to be cleaned up someday.
*
* 'clause' is the expression tree to be searched for subplans.
* 'query' provides the GROUP BY list and range table.
* 'targetList' is the target list that the group clauses refer to.
* (Is it really necessary to pass the tlist separately? Couldn't we
* just use the tlist found in the query node?)
* 'query' provides the GROUP BY list, the target list that the group clauses
* refer to, and the range table.
*/
void
check_subplans_for_ungrouped_vars(Node *clause,
Query *query,
List *targetList)
Query *query)
{
check_subplans_for_ungrouped_vars_context context;
context.query = query;
context.targetList = targetList;
check_subplans_for_ungrouped_vars_walker(clause, &context);
/* No special setup needed; context for walker is just the Query pointer */
check_subplans_for_ungrouped_vars_walker(clause, query);
}
static bool
check_subplans_for_ungrouped_vars_walker(Node *node,
check_subplans_for_ungrouped_vars_context *context)
Query *context)
{
if (node == NULL)
return false;
@ -529,7 +518,7 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
* Else, see if it is a grouping column.
*/
contained_in_group_clause = false;
foreach(gl, context->query->groupClause)
foreach(gl, context->groupClause)
{
GroupClause *gcl = lfirst(gl);
Node *groupexpr;
@ -550,8 +539,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
char *attname;
Assert(var->varno > 0 &&
var->varno <= length(context->query->rtable));
rte = rt_fetch(var->varno, context->query->rtable);
var->varno <= length(context->rtable));
rte = rt_fetch(var->varno, context->rtable);
attname = get_attname(rte->relid, var->varattno);
if (! attname)
elog(ERROR, "cache lookup of attribute %d in relation %u failed",

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: clauses.h,v 1.33 2000/01/26 05:58:20 momjian Exp $
* $Id: clauses.h,v 1.34 2000/03/21 05:11:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -41,9 +41,7 @@ extern List *make_ands_implicit(Expr *clause);
extern List *pull_constant_clauses(List *quals, List **constantQual);
extern bool contain_agg_clause(Node *clause);
extern List *pull_agg_clause(Node *clause);
extern void check_subplans_for_ungrouped_vars(Node *clause,
Query *query,
List *targetList);
extern void check_subplans_for_ungrouped_vars(Node *clause, Query *query);
extern void clause_get_relids_vars(Node *clause, Relids *relids, List **vars);
extern int NumRelids(Node *clause);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: planner.h,v 1.14 2000/02/15 20:49:26 tgl Exp $
* $Id: planner.h,v 1.15 2000/03/21 05:11:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -21,6 +21,7 @@
#include "nodes/plannodes.h"
extern Plan *planner(Query *parse);
extern Plan *subquery_planner(Query *parse, double tuple_fraction);
extern Plan *union_planner(Query *parse, double tuple_fraction);
extern void pg_checkretval(Oid rettype, List *querytree_list);