diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 742177f457..b84bbba68c 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -82,13 +82,14 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes); + bool *unsafeColumns); static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes); + bool *unsafeColumns); +static void check_output_expressions(Query *subquery, bool *unsafeColumns); static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes); + bool *unsafeColumns); static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes); + bool *unsafeColumns); static void subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, @@ -1048,7 +1049,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Query *parse = root->parse; Query *subquery = rte->subquery; Relids required_outer; - bool *differentTypes; + bool *unsafeColumns; double tuple_fraction; PlannerInfo *subroot; List *pathkeys; @@ -1067,8 +1068,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, */ required_outer = rel->lateral_relids; - /* We need a workspace for keeping track of set-op type coercions */ - differentTypes = (bool *) + /* + * We need a workspace for keeping track of unsafe-to-reference columns. + * unsafeColumns[i] is set TRUE if we've found that output column i of the + * subquery is unsafe to use in a pushed-down qual. + */ + unsafeColumns = (bool *) palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); /* @@ -1096,7 +1101,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, * push down a pushable qual, because it'd result in a worse plan? */ if (rel->baserestrictinfo != NIL && - subquery_is_pushdown_safe(subquery, subquery, differentTypes)) + subquery_is_pushdown_safe(subquery, subquery, unsafeColumns)) { /* OK to consider pushing down individual quals */ List *upperrestrictlist = NIL; @@ -1110,7 +1115,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, if (!rinfo->pseudoconstant && (!rte->security_barrier || !contain_leaky_functions(clause)) && - qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) + qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns)) { /* Push it down */ subquery_push_qual(subquery, rte, rti, clause); @@ -1124,7 +1129,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, rel->baserestrictinfo = upperrestrictlist; } - pfree(differentTypes); + pfree(unsafeColumns); /* * We can safely pass the outer tuple_fraction down to the subquery if the @@ -1553,17 +1558,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push * quals into it, because that could change the results. * - * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can - * push quals into each component query, but the quals can only reference - * subquery columns that suffer no type coercions in the set operation. - * Otherwise there are possible semantic gotchas. So, we check the - * component queries to see if any of them have different output types; - * differentTypes[k] is set true if column k has different type in any - * component. + * In addition, we make several checks on the subquery's output columns + * to see if it is safe to reference them in pushed-down quals. If output + * column k is found to be unsafe to reference, we set unsafeColumns[k] to + * TRUE, but we don't reject the subquery overall since column k might + * not be referenced by some/all quals. The unsafeColumns[] array will be + * consulted later by qual_is_pushdown_safe(). It's better to do it this + * way than to make the checks directly in qual_is_pushdown_safe(), because + * when the subquery involves set operations we have to check the output + * expressions in each arm of the set op. */ static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes) + bool *unsafeColumns) { SetOperationStmt *topop; @@ -1575,13 +1582,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->hasWindowFuncs) return false; + /* + * If we're at a leaf query, check for unsafe expressions in its target + * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in + * setop trees have only simple Vars in their tlists, so no need to check + * them.) + */ + if (subquery->setOperations == NULL) + check_output_expressions(subquery, unsafeColumns); + /* Are we at top level, or looking at a setop component? */ if (subquery == topquery) { /* Top level, so check any component queries */ if (subquery->setOperations != NULL) if (!recurse_pushdown_safe(subquery->setOperations, topquery, - differentTypes)) + unsafeColumns)) return false; } else @@ -1594,7 +1610,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, Assert(topop && IsA(topop, SetOperationStmt)); compare_tlist_datatypes(subquery->targetList, topop->colTypes, - differentTypes); + unsafeColumns); } return true; } @@ -1604,7 +1620,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, */ static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes) + bool *unsafeColumns) { if (IsA(setOp, RangeTblRef)) { @@ -1613,19 +1629,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, Query *subquery = rte->subquery; Assert(subquery != NULL); - return subquery_is_pushdown_safe(subquery, topquery, differentTypes); + return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns); } else if (IsA(setOp, SetOperationStmt)) { SetOperationStmt *op = (SetOperationStmt *) setOp; - /* EXCEPT is no good */ + /* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */ if (op->op == SETOP_EXCEPT) return false; /* Else recurse */ - if (!recurse_pushdown_safe(op->larg, topquery, differentTypes)) + if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns)) return false; - if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes)) + if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns)) return false; } else @@ -1637,17 +1653,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, } /* - * Compare tlist's datatypes against the list of set-operation result types. - * For any items that are different, mark the appropriate element of - * differentTypes[] to show that this column will have type conversions. + * check_output_expressions - check subquery's output expressions for safety + * + * There are several cases in which it's unsafe to push down an upper-level + * qual if it references a particular output column of a subquery. We check + * each output column of the subquery and set unsafeColumns[k] to TRUE if + * that column is unsafe for a pushed-down qual to reference. The conditions + * checked here are: + * + * 1. We must not push down any quals that refer to subselect outputs that + * return sets, else we'd introduce functions-returning-sets into the + * subquery's WHERE/HAVING quals. + * + * 2. We must not push down any quals that refer to subselect outputs that + * contain volatile functions, for fear of introducing strange results due + * to multiple evaluation of a volatile function. + * + * 3. If the subquery uses DISTINCT ON, we must not push down any quals that + * refer to non-DISTINCT output columns, because that could change the set + * of rows returned. (This condition is vacuous for DISTINCT, because then + * there are no non-DISTINCT output columns, so we needn't check. But note + * we are assuming that the qual can't distinguish values that the DISTINCT + * operator sees as equal. This is a bit shaky but we have no way to test + * for the case, and it's unlikely enough that we shouldn't refuse the + * optimization just because it could theoretically happen.) + */ +static void +check_output_expressions(Query *subquery, bool *unsafeColumns) +{ + ListCell *lc; + + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* We need not check further if output col is already known unsafe */ + if (unsafeColumns[tle->resno]) + continue; + + /* Functions returning sets are unsafe (point 1) */ + if (expression_returns_set((Node *) tle->expr)) + { + unsafeColumns[tle->resno] = true; + continue; + } + + /* Volatile functions are unsafe (point 2) */ + if (contain_volatile_functions((Node *) tle->expr)) + { + unsafeColumns[tle->resno] = true; + continue; + } + + /* If subquery uses DISTINCT ON, check point 3 */ + if (subquery->hasDistinctOn && + !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) + { + /* non-DISTINCT column, so mark it unsafe */ + unsafeColumns[tle->resno] = true; + continue; + } + } +} + +/* + * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can + * push quals into each component query, but the quals can only reference + * subquery columns that suffer no type coercions in the set operation. + * Otherwise there are possible semantic gotchas. So, we check the + * component queries to see if any of them have output types different from + * the top-level setop outputs. unsafeColumns[k] is set true if column k + * has different type in any component. * * We don't have to care about typmods here: the only allowed difference * between set-op input and output typmods is input is a specific typmod * and output is -1, and that does not require a coercion. + * + * tlist is a subquery tlist. + * colTypes is an OID list of the top-level setop's output column types. + * unsafeColumns[] is the result array. */ static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes) + bool *unsafeColumns) { ListCell *l; ListCell *colType = list_head(colTypes); @@ -1661,7 +1752,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes, if (colType == NULL) elog(ERROR, "wrong number of tlist entries"); if (exprType((Node *) tle->expr) != lfirst_oid(colType)) - differentTypes[tle->resno] = true; + unsafeColumns[tle->resno] = true; colType = lnext(colType); } if (colType != NULL) @@ -1684,34 +1775,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes, * (since there is no easy way to name that within the subquery itself). * * 3. The qual must not refer to any subquery output columns that were - * found to have inconsistent types across a set operation tree by - * subquery_is_pushdown_safe(). - * - * 4. If the subquery uses DISTINCT ON, we must not push down any quals that - * refer to non-DISTINCT output columns, because that could change the set - * of rows returned. (This condition is vacuous for DISTINCT, because then - * there are no non-DISTINCT output columns, so we needn't check. But note - * we are assuming that the qual can't distinguish values that the DISTINCT - * operator sees as equal. This is a bit shaky but we have no way to test - * for the case, and it's unlikely enough that we shouldn't refuse the - * optimization just because it could theoretically happen.) - * - * 5. We must not push down any quals that refer to subselect outputs that - * return sets, else we'd introduce functions-returning-sets into the - * subquery's WHERE/HAVING quals. - * - * 6. We must not push down any quals that refer to subselect outputs that - * contain volatile functions, for fear of introducing strange results due - * to multiple evaluation of a volatile function. + * found to be unsafe to reference by subquery_is_pushdown_safe(). */ static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes) + bool *unsafeColumns) { bool safe = true; List *vars; ListCell *vl; - Bitmapset *tested = NULL; /* Refuse subselects (point 1) */ if (contain_subplans(qual)) @@ -1734,7 +1806,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, foreach(vl, vars) { Var *var = (Var *) lfirst(vl); - TargetEntry *tle; /* * XXX Punt if we find any PlaceHolderVars in the restriction clause. @@ -1750,6 +1821,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, } Assert(var->varno == rti); + Assert(var->varattno >= 0); /* Check point 2 */ if (var->varattno == 0) @@ -1758,45 +1830,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, break; } - /* - * We use a bitmapset to avoid testing the same attno more than once. - * (NB: this only works because subquery outputs can't have negative - * attnos.) - */ - if (bms_is_member(var->varattno, tested)) - continue; - tested = bms_add_member(tested, var->varattno); - /* Check point 3 */ - if (differentTypes[var->varattno]) - { - safe = false; - break; - } - - /* Must find the tlist element referenced by the Var */ - tle = get_tle_by_resno(subquery->targetList, var->varattno); - Assert(tle != NULL); - Assert(!tle->resjunk); - - /* If subquery uses DISTINCT ON, check point 4 */ - if (subquery->hasDistinctOn && - !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) - { - /* non-DISTINCT column, so fail */ - safe = false; - break; - } - - /* Refuse functions returning sets (point 5) */ - if (expression_returns_set((Node *) tle->expr)) - { - safe = false; - break; - } - - /* Refuse volatile functions (point 6) */ - if (contain_volatile_functions((Node *) tle->expr)) + if (unsafeColumns[var->varattno]) { safe = false; break; @@ -1804,7 +1839,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, } list_free(vars); - bms_free(tested); return safe; } diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 031be5389e..435f50d050 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -515,3 +515,91 @@ explain (costs off) -> Seq Scan on tenk1 b (2 rows) +-- Test that we push quals into UNION sub-selects only when it's safe +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + QUERY PLAN +-------------------------------------------- + Unique + -> Sort + Sort Key: (1), (2) + -> Append + -> Result + -> Result + One-Time Filter: false +(7 rows) + +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + t | x +---+--- + 1 | 2 +(1 row) + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + QUERY PLAN +-------------------------------- + Sort + Sort Key: ss.x + -> Subquery Scan on ss + Filter: (ss.x < 4) + -> HashAggregate + -> Append + -> Result + -> Result +(8 rows) + +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + t | x +---+--- + 1 | 1 + 1 | 2 + 1 | 3 +(3 rows) + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3; + QUERY PLAN +---------------------------------------------------------------------------- + Subquery Scan on ss + Filter: (ss.x > 3) + -> Unique + -> Sort + Sort Key: (1), (((random() * 3::double precision))::integer) + -> Append + -> Result + -> Result +(8 rows) + +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3; + t | x +---+--- + 2 | 4 +(1 row) + diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index 752ae470f0..f3c9d11382 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -207,3 +207,45 @@ explain (costs off) UNION ALL SELECT 2 AS t, * FROM tenk1 b) c WHERE t = 2; + +-- Test that we push quals into UNION sub-selects only when it's safe +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3; + +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3;