diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 6ca70a91f1..83e8d7ec16 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -132,14 +132,14 @@ than applying a sort to the cheapest other path). Optimizer Functions ------------------- +The primary entry point is planner(). + planner() - handle inheritance by processing separately --init_query_planner() - preprocess target list - preprocess qualifications(WHERE) ---query_planner() - simplify constant subexpressions - canonicalize_qual() + set up for recursive handling of subqueries + do final cleanup after planning. +-subquery_planner() + simplify constant expressions + canonicalize qual Attempt to reduce WHERE clause to either CNF or DNF canonical form. CNF (top-level-AND) is preferred, since the optimizer can then use any of the AND subclauses to filter tuples; but quals that are in @@ -147,6 +147,13 @@ planner() force them to CNF. In pathological cases either transform may expand the qual unreasonably; so we may have to leave it un-normalized, thereby reducing the accuracy of selectivity estimates. + process sublinks + convert Vars of outer query levels into Params +--union_planner() + handle unions and inheritance by mutual recursion with prepunion.c routines + preprocess target list + handle GROUP BY, HAVING, aggregates, ORDER BY, DISTINCT +--query_planner() pull out constants from target list get a target list that only contains column names, no expressions if none, then return diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index cfa134a388..acc4eb5d93 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -3,29 +3,31 @@ * planmain.c * Routines to plan a single query * + * What's in a name, anyway? The top-level entry point of the planner/ + * optimizer is over in planner.c, not here as you might think from the + * file name. But this is the main code for planning a basic join operation, + * shorn of features like subselects, inheritance, aggregates, grouping, + * and so on. (Those are the things planner.c deals with.) + * * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.52 2000/02/15 20:49:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.53 2000/03/21 05:11:58 tgl Exp $ * *------------------------------------------------------------------------- */ -#include - #include "postgres.h" +#include #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" -#include "optimizer/prep.h" -#include "optimizer/subselect.h" #include "optimizer/tlist.h" -#include "utils/lsyscache.h" static Plan *subplanner(Query *root, List *flat_tlist, List *qual, @@ -34,19 +36,15 @@ static Plan *subplanner(Query *root, List *flat_tlist, List *qual, /*-------------------- * query_planner - * Routine to create a query plan. It does so by first creating a - * subplan for the topmost level of attributes in the query. Then, - * it modifies all target list and qualifications to consider the next - * level of nesting and creates a plan for this modified query by - * recursively calling itself. The two pieces are then merged together - * by creating a result node that indicates which attributes should - * be placed where and any relation level qualifications to be - * satisfied. + * Generate a plan for a basic query, which may involve joins but + * not any fancier features. * - * tlist is the target list of the query (do NOT use root->targetList!) + * tlist is the target list the query should produce (NOT root->targetList!) * qual is the qualification of the query (likewise!) * tuple_fraction is the fraction of tuples we expect will be retrieved * + * qual must already have been converted to implicit-AND form. + * * Note: the Query node now also includes a query_pathkeys field, which * is both an input and an output of query_planner(). The input value * signals query_planner that the indicated sort order is wanted in the @@ -83,46 +81,6 @@ query_planner(Query *root, List *var_only_tlist; Plan *subplan; - /* - * Note: union_planner should already have done constant folding - * in both the tlist and qual, so we don't do it again here - * (indeed, we may be getting a flattened var-only tlist anyway). - * - * Is there any value in re-folding the qual after canonicalize_qual? - */ - - /* - * Canonicalize the qual, and convert it to implicit-AND format. - */ - qual = canonicalize_qual((Expr *) qual, true); -#ifdef OPTIMIZER_DEBUG - printf("After canonicalize_qual()\n"); - pprint(qual); -#endif - - /* Replace uplevel vars with Param nodes */ - if (PlannerQueryLevel > 1) - { - tlist = (List *) SS_replace_correlation_vars((Node *) tlist); - qual = (List *) SS_replace_correlation_vars((Node *) qual); - } - - /* Expand SubLinks to SubPlans */ - if (root->hasSubLinks) - { - tlist = (List *) SS_process_sublinks((Node *) tlist); - qual = (List *) SS_process_sublinks((Node *) qual); - if (root->groupClause != NIL) - { - /* - * Check for ungrouped variables passed to subplans. - * Note we do NOT do this for subplans in WHERE; it's legal - * there because WHERE is evaluated pre-GROUP. - */ - check_subplans_for_ungrouped_vars((Node *) tlist, root, tlist); - } - } - /* * If the query contains no relation references at all, it must be * something like "SELECT 2+2;". Build a trivial "Result" plan. @@ -192,16 +150,6 @@ query_planner(Query *root, } return subplan; - -#ifdef NOT_USED - - /* - * Destructively modify the query plan's targetlist to add fjoin lists - * to flatten functions that return sets of base types - */ - subplan->targetlist = generate_fjoin(subplan->targetlist); -#endif - } /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 3faf0904d3..b8871d5801 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.77 2000/03/14 02:23:15 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.78 2000/03/21 05:12:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,6 +36,7 @@ #include "utils/lsyscache.h" #include "utils/syscache.h" + static List *make_subplanTargetList(Query *parse, List *tlist, AttrNumber **groupColIdx); static Plan *make_groupplan(List *group_tlist, bool tuplePerGroup, @@ -59,23 +60,154 @@ planner(Query *parse) PlannerParamVar = NULL; PlannerPlanId = 0; + /* this should go away sometime soon */ transformKeySetQuery(parse); - result_plan = union_planner(parse, -1.0 /* default case */); + /* primary planning entry point (may recurse for subplans) */ + result_plan = subquery_planner(parse, -1.0 /* default case */); Assert(PlannerQueryLevel == 1); + + /* if top-level query had subqueries, do housekeeping for them */ if (PlannerPlanId > 0) { - result_plan->initPlan = PlannerInitPlan; (void) SS_finalize_plan(result_plan); + result_plan->initPlan = PlannerInitPlan; } + + /* executor wants to know total number of Params used overall */ result_plan->nParamExec = length(PlannerParamVar); + /* final cleanup of the plan */ set_plan_references(result_plan); return result_plan; } + +/*-------------------- + * subquery_planner + * Invokes the planner on a subquery. We recurse to here for each + * sub-SELECT found in the query tree. + * + * parse is the querytree produced by the parser & rewriter. + * tuple_fraction is the fraction of tuples we expect will be retrieved. + * tuple_fraction is interpreted as explained for union_planner, below. + * + * Basically, this routine does the stuff that should only be done once + * per Query object. It then calls union_planner, which may be called + * recursively on the same Query node in order to handle UNIONs and/or + * inheritance. subquery_planner is called recursively from subselect.c. + * + * prepunion.c uses an unholy combination of calling union_planner when + * recursing on the primary Query node, or subquery_planner when recursing + * on a UNION'd Query node that hasn't previously been seen by + * subquery_planner. That whole chunk of code needs rewritten from scratch. + * + * Returns a query plan. + *-------------------- + */ +Plan * +subquery_planner(Query *parse, double tuple_fraction) +{ + /* + * A HAVING clause without aggregates is equivalent to a WHERE clause + * (except it can only refer to grouped fields). If there are no + * aggs anywhere in the query, then we don't want to create an Agg + * plan node, so merge the HAVING condition into WHERE. (We used to + * consider this an error condition, but it seems to be legal SQL.) + */ + if (parse->havingQual != NULL && ! parse->hasAggs) + { + if (parse->qual == NULL) + parse->qual = parse->havingQual; + else + parse->qual = (Node *) make_andclause(lappend(lcons(parse->qual, + NIL), + parse->havingQual)); + parse->havingQual = NULL; + } + + /* + * Simplify constant expressions in targetlist and quals. + * + * Note that at this point the qual has not yet been converted to + * implicit-AND form, so we can apply eval_const_expressions directly. + * Also note that we need to do this before SS_process_sublinks, + * because that routine inserts bogus "Const" nodes. + */ + parse->targetList = (List *) + eval_const_expressions((Node *) parse->targetList); + parse->qual = eval_const_expressions(parse->qual); + parse->havingQual = eval_const_expressions(parse->havingQual); + + /* + * Canonicalize the qual, and convert it to implicit-AND format. + * + * XXX Is there any value in re-applying eval_const_expressions + * after canonicalize_qual? + */ + parse->qual = (Node *) canonicalize_qual((Expr *) parse->qual, true); +#ifdef OPTIMIZER_DEBUG + printf("After canonicalize_qual()\n"); + pprint(parse->qual); +#endif + + /* + * Ditto for the havingQual + */ + parse->havingQual = (Node *) canonicalize_qual((Expr *) parse->havingQual, + true); + + /* Expand SubLinks to SubPlans */ + if (parse->hasSubLinks) + { + parse->targetList = (List *) + SS_process_sublinks((Node *) parse->targetList); + parse->qual = SS_process_sublinks(parse->qual); + parse->havingQual = SS_process_sublinks(parse->havingQual); + + if (parse->groupClause != NIL) + { + /* + * Check for ungrouped variables passed to subplans. + * Note we do NOT do this for subplans in WHERE; it's legal + * there because WHERE is evaluated pre-GROUP. + * + * An interesting fine point: if we reassigned a HAVING qual + * into WHERE above, then we will accept references to ungrouped + * vars from subplans in the HAVING qual. This is not entirely + * consistent, but it doesn't seem particularly harmful... + */ + check_subplans_for_ungrouped_vars((Node *) parse->targetList, + parse); + check_subplans_for_ungrouped_vars(parse->havingQual, parse); + } + } + + /* Replace uplevel vars with Param nodes */ + if (PlannerQueryLevel > 1) + { + parse->targetList = (List *) + SS_replace_correlation_vars((Node *) parse->targetList); + parse->qual = SS_replace_correlation_vars(parse->qual); + parse->havingQual = SS_replace_correlation_vars(parse->havingQual); + } + + /* Do the main planning (potentially recursive) */ + + return union_planner(parse, tuple_fraction); + + /* + * XXX should any more of union_planner's activity be moved here? + * + * That would take careful study of the interactions with prepunion.c, + * but I suspect it would pay off in simplicity and avoidance of + * wasted cycles. + */ +} + + /*-------------------- * union_planner * Invokes the planner on union-type queries (both regular UNIONs and @@ -111,37 +243,6 @@ union_planner(Query *parse, List *sort_pathkeys; Index rt_index; - /* - * A HAVING clause without aggregates is equivalent to a WHERE clause - * (except it can only refer to grouped fields). If there are no - * aggs anywhere in the query, then we don't want to create an Agg - * plan node, so merge the HAVING condition into WHERE. (We used to - * consider this an error condition, but it seems to be legal SQL.) - */ - if (parse->havingQual != NULL && ! parse->hasAggs) - { - if (parse->qual == NULL) - parse->qual = parse->havingQual; - else - parse->qual = (Node *) make_andclause(lappend(lcons(parse->qual, - NIL), - parse->havingQual)); - parse->havingQual = NULL; - } - - /* - * Simplify constant expressions in targetlist and quals. - * - * Note that at this point the qual has not yet been converted to - * implicit-AND form, so we can apply eval_const_expressions directly. - * Also note that we need to do this before SS_process_sublinks, - * because that routine inserts bogus "Const" nodes. - */ - tlist = (List *) eval_const_expressions((Node *) tlist); - parse->qual = eval_const_expressions(parse->qual); - parse->havingQual = eval_const_expressions(parse->havingQual); - - if (parse->unionClause) { result_plan = (Plan *) plan_union_queries(parse); @@ -474,32 +575,6 @@ union_planner(Query *parse, result_plan); } - /* - * If we have a HAVING clause, do the necessary things with it. - * This code should parallel query_planner()'s initial processing - * of the WHERE clause. - */ - if (parse->havingQual) - { - /* Convert the havingQual to implicit-AND normal form */ - parse->havingQual = (Node *) - canonicalize_qual((Expr *) parse->havingQual, true); - - /* Replace uplevel Vars with Params */ - if (PlannerQueryLevel > 1) - parse->havingQual = SS_replace_correlation_vars(parse->havingQual); - - if (parse->hasSubLinks) - { - /* Expand SubLinks to SubPlans */ - parse->havingQual = SS_process_sublinks(parse->havingQual); - /* Check for ungrouped variables passed to subplans */ - check_subplans_for_ungrouped_vars(parse->havingQual, - parse, - parse->targetList); - } - } - /* * If aggregate is present, insert the Agg node * diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index eedac18b20..3928f57836 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.32 2000/03/17 02:36:15 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.33 2000/03/21 05:12:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -163,7 +163,7 @@ make_subplan(SubLink *slink) else tuple_fraction = -1.0; /* default behavior */ - node->plan = plan = union_planner(subquery, tuple_fraction); + node->plan = plan = subquery_planner(subquery, tuple_fraction); /* * Assign subPlan, extParam and locParam to plan nodes. At the moment, diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index c0e09dc6d9..e866a5032f 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.46 2000/03/14 23:06:29 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.47 2000/03/21 05:12:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -149,8 +149,12 @@ plan_union_queries(Query *parse) { Query *union_query = lfirst(ulist); + /* use subquery_planner here because the union'd queries + * have not been preprocessed yet. My goodness this is messy... + */ union_plans = lappend(union_plans, - union_planner(union_query, tuple_fraction)); + subquery_planner(union_query, + tuple_fraction)); union_rts = lappend(union_rts, union_query->rtable); } } @@ -185,8 +189,11 @@ plan_union_queries(Query *parse) { Query *union_all_query = lfirst(ulist); + /* use subquery_planner here because the union'd queries + * have not been preprocessed yet. My goodness this is messy... + */ union_plans = lappend(union_plans, - union_planner(union_all_query, -1.0)); + subquery_planner(union_all_query, -1.0)); union_rts = lappend(union_rts, union_all_query->rtable); } } diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 72f9b1117b..8d4dae486a 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.62 2000/03/19 18:20:38 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.63 2000/03/21 05:12:12 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -41,15 +41,10 @@ ((Node *) makeConst(BOOLOID, 1, (Datum) (val), \ (isnull), true, false, false)) -typedef struct { - Query *query; - List *targetList; -} check_subplans_for_ungrouped_vars_context; - static bool contain_agg_clause_walker(Node *node, void *context); static bool pull_agg_clause_walker(Node *node, List **listptr); static bool check_subplans_for_ungrouped_vars_walker(Node *node, - check_subplans_for_ungrouped_vars_context *context); + Query *context); static int is_single_func(Node *node); static Node *eval_const_expressions_mutator (Node *node, void *context); static Expr *simplify_op_or_func(Expr *expr, List *args); @@ -467,30 +462,24 @@ pull_agg_clause_walker(Node *node, List **listptr) * In most contexts, ungrouped variables will be detected by the parser (see * parse_agg.c, check_ungrouped_columns()). But that routine currently does * not check subplans, because the necessary info is not computed until the - * planner runs. So we do it here, after we have processed the subplan. - * This ought to be cleaned up someday. + * planner runs. So we do it here, after we have processed sublinks into + * subplans. This ought to be cleaned up someday. * * 'clause' is the expression tree to be searched for subplans. - * 'query' provides the GROUP BY list and range table. - * 'targetList' is the target list that the group clauses refer to. - * (Is it really necessary to pass the tlist separately? Couldn't we - * just use the tlist found in the query node?) + * 'query' provides the GROUP BY list, the target list that the group clauses + * refer to, and the range table. */ void check_subplans_for_ungrouped_vars(Node *clause, - Query *query, - List *targetList) + Query *query) { - check_subplans_for_ungrouped_vars_context context; - - context.query = query; - context.targetList = targetList; - check_subplans_for_ungrouped_vars_walker(clause, &context); + /* No special setup needed; context for walker is just the Query pointer */ + check_subplans_for_ungrouped_vars_walker(clause, query); } static bool check_subplans_for_ungrouped_vars_walker(Node *node, - check_subplans_for_ungrouped_vars_context *context) + Query *context) { if (node == NULL) return false; @@ -529,7 +518,7 @@ check_subplans_for_ungrouped_vars_walker(Node *node, * Else, see if it is a grouping column. */ contained_in_group_clause = false; - foreach(gl, context->query->groupClause) + foreach(gl, context->groupClause) { GroupClause *gcl = lfirst(gl); Node *groupexpr; @@ -550,8 +539,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node, char *attname; Assert(var->varno > 0 && - var->varno <= length(context->query->rtable)); - rte = rt_fetch(var->varno, context->query->rtable); + var->varno <= length(context->rtable)); + rte = rt_fetch(var->varno, context->rtable); attname = get_attname(rte->relid, var->varattno); if (! attname) elog(ERROR, "cache lookup of attribute %d in relation %u failed", diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 0bbe15b245..ef2bfeb63b 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: clauses.h,v 1.33 2000/01/26 05:58:20 momjian Exp $ + * $Id: clauses.h,v 1.34 2000/03/21 05:11:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,9 +41,7 @@ extern List *make_ands_implicit(Expr *clause); extern List *pull_constant_clauses(List *quals, List **constantQual); extern bool contain_agg_clause(Node *clause); extern List *pull_agg_clause(Node *clause); -extern void check_subplans_for_ungrouped_vars(Node *clause, - Query *query, - List *targetList); +extern void check_subplans_for_ungrouped_vars(Node *clause, Query *query); extern void clause_get_relids_vars(Node *clause, Relids *relids, List **vars); extern int NumRelids(Node *clause); diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index c06f41b852..3f4fc2a38e 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: planner.h,v 1.14 2000/02/15 20:49:26 tgl Exp $ + * $Id: planner.h,v 1.15 2000/03/21 05:11:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,7 @@ #include "nodes/plannodes.h" extern Plan *planner(Query *parse); +extern Plan *subquery_planner(Query *parse, double tuple_fraction); extern Plan *union_planner(Query *parse, double tuple_fraction); extern void pg_checkretval(Oid rettype, List *querytree_list);