Prevent pushing down WHERE clauses into unsafe UNION/INTERSECT nests.

The planner is aware that it mustn't push down upper-level quals into
subqueries if the quals reference subquery output columns that contain
set-returning functions or volatile functions, or are non-DISTINCT outputs
of a DISTINCT ON subquery.  However, it missed making this check when
there were one or more levels of UNION or INTERSECT above the dangerous
expression.  This could lead to "set-valued function called in context that
cannot accept a set" errors, as seen in bug #8213 from Eric Soroos, or to
silently wrong answers in the other cases.

To fix, refactor the checks so that we make the column-is-unsafe checks
during subquery_is_pushdown_safe(), which already has to recursively
inspect all arms of a set-operation tree.  This makes
qual_is_pushdown_safe() considerably simpler, at the cost that we will
spend some cycles checking output columns that possibly aren't referenced
in any upper qual.  But the cases where this code gets executed at all
are already nontrivial queries, so it's unlikely anybody will notice any
slowdown of planning.

This has been broken since commit 05f916e6ad,
which makes the bug over ten years old.  A bit surprising nobody noticed it
before now.
This commit is contained in:
Tom Lane 2013-06-05 23:44:02 -04:00
parent a3bd6096bd
commit 964c0d0f80
3 changed files with 255 additions and 91 deletions

View File

@ -82,13 +82,14 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte); RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
bool *differentTypes); bool *unsafeColumns);
static bool recurse_pushdown_safe(Node *setOp, Query *topquery, static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
bool *differentTypes); bool *unsafeColumns);
static void check_output_expressions(Query *subquery, bool *unsafeColumns);
static void compare_tlist_datatypes(List *tlist, List *colTypes, static void compare_tlist_datatypes(List *tlist, List *colTypes,
bool *differentTypes); bool *unsafeColumns);
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
bool *differentTypes); bool *unsafeColumns);
static void subquery_push_qual(Query *subquery, static void subquery_push_qual(Query *subquery,
RangeTblEntry *rte, Index rti, Node *qual); RangeTblEntry *rte, Index rti, Node *qual);
static void recurse_push_qual(Node *setOp, Query *topquery, static void recurse_push_qual(Node *setOp, Query *topquery,
@ -1048,7 +1049,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Query *parse = root->parse; Query *parse = root->parse;
Query *subquery = rte->subquery; Query *subquery = rte->subquery;
Relids required_outer; Relids required_outer;
bool *differentTypes; bool *unsafeColumns;
double tuple_fraction; double tuple_fraction;
PlannerInfo *subroot; PlannerInfo *subroot;
List *pathkeys; List *pathkeys;
@ -1067,8 +1068,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
*/ */
required_outer = rel->lateral_relids; required_outer = rel->lateral_relids;
/* We need a workspace for keeping track of set-op type coercions */ /*
differentTypes = (bool *) * We need a workspace for keeping track of unsafe-to-reference columns.
* unsafeColumns[i] is set TRUE if we've found that output column i of the
* subquery is unsafe to use in a pushed-down qual.
*/
unsafeColumns = (bool *)
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
/* /*
@ -1096,7 +1101,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
* push down a pushable qual, because it'd result in a worse plan? * push down a pushable qual, because it'd result in a worse plan?
*/ */
if (rel->baserestrictinfo != NIL && if (rel->baserestrictinfo != NIL &&
subquery_is_pushdown_safe(subquery, subquery, differentTypes)) subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
{ {
/* OK to consider pushing down individual quals */ /* OK to consider pushing down individual quals */
List *upperrestrictlist = NIL; List *upperrestrictlist = NIL;
@ -1110,7 +1115,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
if (!rinfo->pseudoconstant && if (!rinfo->pseudoconstant &&
(!rte->security_barrier || (!rte->security_barrier ||
!contain_leaky_functions(clause)) && !contain_leaky_functions(clause)) &&
qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
{ {
/* Push it down */ /* Push it down */
subquery_push_qual(subquery, rte, rti, clause); subquery_push_qual(subquery, rte, rti, clause);
@ -1124,7 +1129,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
rel->baserestrictinfo = upperrestrictlist; rel->baserestrictinfo = upperrestrictlist;
} }
pfree(differentTypes); pfree(unsafeColumns);
/* /*
* We can safely pass the outer tuple_fraction down to the subquery if the * We can safely pass the outer tuple_fraction down to the subquery if the
@ -1553,17 +1558,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that could change the results. * quals into it, because that could change the results.
* *
* 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can * In addition, we make several checks on the subquery's output columns
* push quals into each component query, but the quals can only reference * to see if it is safe to reference them in pushed-down quals. If output
* subquery columns that suffer no type coercions in the set operation. * column k is found to be unsafe to reference, we set unsafeColumns[k] to
* Otherwise there are possible semantic gotchas. So, we check the * TRUE, but we don't reject the subquery overall since column k might
* component queries to see if any of them have different output types; * not be referenced by some/all quals. The unsafeColumns[] array will be
* differentTypes[k] is set true if column k has different type in any * consulted later by qual_is_pushdown_safe(). It's better to do it this
* component. * way than to make the checks directly in qual_is_pushdown_safe(), because
* when the subquery involves set operations we have to check the output
* expressions in each arm of the set op.
*/ */
static bool static bool
subquery_is_pushdown_safe(Query *subquery, Query *topquery, subquery_is_pushdown_safe(Query *subquery, Query *topquery,
bool *differentTypes) bool *unsafeColumns)
{ {
SetOperationStmt *topop; SetOperationStmt *topop;
@ -1575,13 +1582,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
if (subquery->hasWindowFuncs) if (subquery->hasWindowFuncs)
return false; return false;
/*
* If we're at a leaf query, check for unsafe expressions in its target
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
* setop trees have only simple Vars in their tlists, so no need to check
* them.)
*/
if (subquery->setOperations == NULL)
check_output_expressions(subquery, unsafeColumns);
/* Are we at top level, or looking at a setop component? */ /* Are we at top level, or looking at a setop component? */
if (subquery == topquery) if (subquery == topquery)
{ {
/* Top level, so check any component queries */ /* Top level, so check any component queries */
if (subquery->setOperations != NULL) if (subquery->setOperations != NULL)
if (!recurse_pushdown_safe(subquery->setOperations, topquery, if (!recurse_pushdown_safe(subquery->setOperations, topquery,
differentTypes)) unsafeColumns))
return false; return false;
} }
else else
@ -1594,7 +1610,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
Assert(topop && IsA(topop, SetOperationStmt)); Assert(topop && IsA(topop, SetOperationStmt));
compare_tlist_datatypes(subquery->targetList, compare_tlist_datatypes(subquery->targetList,
topop->colTypes, topop->colTypes,
differentTypes); unsafeColumns);
} }
return true; return true;
} }
@ -1604,7 +1620,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
*/ */
static bool static bool
recurse_pushdown_safe(Node *setOp, Query *topquery, recurse_pushdown_safe(Node *setOp, Query *topquery,
bool *differentTypes) bool *unsafeColumns)
{ {
if (IsA(setOp, RangeTblRef)) if (IsA(setOp, RangeTblRef))
{ {
@ -1613,19 +1629,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
Query *subquery = rte->subquery; Query *subquery = rte->subquery;
Assert(subquery != NULL); Assert(subquery != NULL);
return subquery_is_pushdown_safe(subquery, topquery, differentTypes); return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
} }
else if (IsA(setOp, SetOperationStmt)) else if (IsA(setOp, SetOperationStmt))
{ {
SetOperationStmt *op = (SetOperationStmt *) setOp; SetOperationStmt *op = (SetOperationStmt *) setOp;
/* EXCEPT is no good */ /* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */
if (op->op == SETOP_EXCEPT) if (op->op == SETOP_EXCEPT)
return false; return false;
/* Else recurse */ /* Else recurse */
if (!recurse_pushdown_safe(op->larg, topquery, differentTypes)) if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
return false; return false;
if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes)) if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
return false; return false;
} }
else else
@ -1637,17 +1653,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
} }
/* /*
* Compare tlist's datatypes against the list of set-operation result types. * check_output_expressions - check subquery's output expressions for safety
* For any items that are different, mark the appropriate element of *
* differentTypes[] to show that this column will have type conversions. * There are several cases in which it's unsafe to push down an upper-level
* qual if it references a particular output column of a subquery. We check
* each output column of the subquery and set unsafeColumns[k] to TRUE if
* that column is unsafe for a pushed-down qual to reference. The conditions
* checked here are:
*
* 1. We must not push down any quals that refer to subselect outputs that
* return sets, else we'd introduce functions-returning-sets into the
* subquery's WHERE/HAVING quals.
*
* 2. We must not push down any quals that refer to subselect outputs that
* contain volatile functions, for fear of introducing strange results due
* to multiple evaluation of a volatile function.
*
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
* refer to non-DISTINCT output columns, because that could change the set
* of rows returned. (This condition is vacuous for DISTINCT, because then
* there are no non-DISTINCT output columns, so we needn't check. But note
* we are assuming that the qual can't distinguish values that the DISTINCT
* operator sees as equal. This is a bit shaky but we have no way to test
* for the case, and it's unlikely enough that we shouldn't refuse the
* optimization just because it could theoretically happen.)
*/
static void
check_output_expressions(Query *subquery, bool *unsafeColumns)
{
ListCell *lc;
foreach(lc, subquery->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
if (tle->resjunk)
continue; /* ignore resjunk columns */
/* We need not check further if output col is already known unsafe */
if (unsafeColumns[tle->resno])
continue;
/* Functions returning sets are unsafe (point 1) */
if (expression_returns_set((Node *) tle->expr))
{
unsafeColumns[tle->resno] = true;
continue;
}
/* Volatile functions are unsafe (point 2) */
if (contain_volatile_functions((Node *) tle->expr))
{
unsafeColumns[tle->resno] = true;
continue;
}
/* If subquery uses DISTINCT ON, check point 3 */
if (subquery->hasDistinctOn &&
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
{
/* non-DISTINCT column, so mark it unsafe */
unsafeColumns[tle->resno] = true;
continue;
}
}
}
/*
* For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
* push quals into each component query, but the quals can only reference
* subquery columns that suffer no type coercions in the set operation.
* Otherwise there are possible semantic gotchas. So, we check the
* component queries to see if any of them have output types different from
* the top-level setop outputs. unsafeColumns[k] is set true if column k
* has different type in any component.
* *
* We don't have to care about typmods here: the only allowed difference * We don't have to care about typmods here: the only allowed difference
* between set-op input and output typmods is input is a specific typmod * between set-op input and output typmods is input is a specific typmod
* and output is -1, and that does not require a coercion. * and output is -1, and that does not require a coercion.
*
* tlist is a subquery tlist.
* colTypes is an OID list of the top-level setop's output column types.
* unsafeColumns[] is the result array.
*/ */
static void static void
compare_tlist_datatypes(List *tlist, List *colTypes, compare_tlist_datatypes(List *tlist, List *colTypes,
bool *differentTypes) bool *unsafeColumns)
{ {
ListCell *l; ListCell *l;
ListCell *colType = list_head(colTypes); ListCell *colType = list_head(colTypes);
@ -1661,7 +1752,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
if (colType == NULL) if (colType == NULL)
elog(ERROR, "wrong number of tlist entries"); elog(ERROR, "wrong number of tlist entries");
if (exprType((Node *) tle->expr) != lfirst_oid(colType)) if (exprType((Node *) tle->expr) != lfirst_oid(colType))
differentTypes[tle->resno] = true; unsafeColumns[tle->resno] = true;
colType = lnext(colType); colType = lnext(colType);
} }
if (colType != NULL) if (colType != NULL)
@ -1684,34 +1775,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
* (since there is no easy way to name that within the subquery itself). * (since there is no easy way to name that within the subquery itself).
* *
* 3. The qual must not refer to any subquery output columns that were * 3. The qual must not refer to any subquery output columns that were
* found to have inconsistent types across a set operation tree by * found to be unsafe to reference by subquery_is_pushdown_safe().
* subquery_is_pushdown_safe().
*
* 4. If the subquery uses DISTINCT ON, we must not push down any quals that
* refer to non-DISTINCT output columns, because that could change the set
* of rows returned. (This condition is vacuous for DISTINCT, because then
* there are no non-DISTINCT output columns, so we needn't check. But note
* we are assuming that the qual can't distinguish values that the DISTINCT
* operator sees as equal. This is a bit shaky but we have no way to test
* for the case, and it's unlikely enough that we shouldn't refuse the
* optimization just because it could theoretically happen.)
*
* 5. We must not push down any quals that refer to subselect outputs that
* return sets, else we'd introduce functions-returning-sets into the
* subquery's WHERE/HAVING quals.
*
* 6. We must not push down any quals that refer to subselect outputs that
* contain volatile functions, for fear of introducing strange results due
* to multiple evaluation of a volatile function.
*/ */
static bool static bool
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
bool *differentTypes) bool *unsafeColumns)
{ {
bool safe = true; bool safe = true;
List *vars; List *vars;
ListCell *vl; ListCell *vl;
Bitmapset *tested = NULL;
/* Refuse subselects (point 1) */ /* Refuse subselects (point 1) */
if (contain_subplans(qual)) if (contain_subplans(qual))
@ -1734,7 +1806,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
foreach(vl, vars) foreach(vl, vars)
{ {
Var *var = (Var *) lfirst(vl); Var *var = (Var *) lfirst(vl);
TargetEntry *tle;
/* /*
* XXX Punt if we find any PlaceHolderVars in the restriction clause. * XXX Punt if we find any PlaceHolderVars in the restriction clause.
@ -1750,6 +1821,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
} }
Assert(var->varno == rti); Assert(var->varno == rti);
Assert(var->varattno >= 0);
/* Check point 2 */ /* Check point 2 */
if (var->varattno == 0) if (var->varattno == 0)
@ -1758,45 +1830,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
break; break;
} }
/*
* We use a bitmapset to avoid testing the same attno more than once.
* (NB: this only works because subquery outputs can't have negative
* attnos.)
*/
if (bms_is_member(var->varattno, tested))
continue;
tested = bms_add_member(tested, var->varattno);
/* Check point 3 */ /* Check point 3 */
if (differentTypes[var->varattno]) if (unsafeColumns[var->varattno])
{
safe = false;
break;
}
/* Must find the tlist element referenced by the Var */
tle = get_tle_by_resno(subquery->targetList, var->varattno);
Assert(tle != NULL);
Assert(!tle->resjunk);
/* If subquery uses DISTINCT ON, check point 4 */
if (subquery->hasDistinctOn &&
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
{
/* non-DISTINCT column, so fail */
safe = false;
break;
}
/* Refuse functions returning sets (point 5) */
if (expression_returns_set((Node *) tle->expr))
{
safe = false;
break;
}
/* Refuse volatile functions (point 6) */
if (contain_volatile_functions((Node *) tle->expr))
{ {
safe = false; safe = false;
break; break;
@ -1804,7 +1839,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
} }
list_free(vars); list_free(vars);
bms_free(tested);
return safe; return safe;
} }

View File

@ -515,3 +515,91 @@ explain (costs off)
-> Seq Scan on tenk1 b -> Seq Scan on tenk1 b
(2 rows) (2 rows)
-- Test that we push quals into UNION sub-selects only when it's safe
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, 2 AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4;
QUERY PLAN
--------------------------------------------
Unique
-> Sort
Sort Key: (1), (2)
-> Append
-> Result
-> Result
One-Time Filter: false
(7 rows)
SELECT * FROM
(SELECT 1 AS t, 2 AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4;
t | x
---+---
1 | 2
(1 row)
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, generate_series(1,10) AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
QUERY PLAN
--------------------------------
Sort
Sort Key: ss.x
-> Subquery Scan on ss
Filter: (ss.x < 4)
-> HashAggregate
-> Append
-> Result
-> Result
(8 rows)
SELECT * FROM
(SELECT 1 AS t, generate_series(1,10) AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
t | x
---+---
1 | 1
1 | 2
1 | 3
(3 rows)
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x > 3;
QUERY PLAN
----------------------------------------------------------------------------
Subquery Scan on ss
Filter: (ss.x > 3)
-> Unique
-> Sort
Sort Key: (1), (((random() * 3::double precision))::integer)
-> Append
-> Result
-> Result
(8 rows)
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x > 3;
t | x
---+---
2 | 4
(1 row)

View File

@ -207,3 +207,45 @@ explain (costs off)
UNION ALL UNION ALL
SELECT 2 AS t, * FROM tenk1 b) c SELECT 2 AS t, * FROM tenk1 b) c
WHERE t = 2; WHERE t = 2;
-- Test that we push quals into UNION sub-selects only when it's safe
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, 2 AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4;
SELECT * FROM
(SELECT 1 AS t, 2 AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4;
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, generate_series(1,10) AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
SELECT * FROM
(SELECT 1 AS t, generate_series(1,10) AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x > 3;
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x > 3;